parse-chunked.js (13331B)
1 const { isReadableStream } = require('./utils'); 2 const TextDecoder = require('./text-decoder'); 3 4 const STACK_OBJECT = 1; 5 const STACK_ARRAY = 2; 6 const decoder = new TextDecoder(); 7 8 function isObject(value) { 9 return value !== null && typeof value === 'object'; 10 } 11 12 function adjustPosition(error, parser) { 13 if (error.name === 'SyntaxError' && parser.jsonParseOffset) { 14 error.message = error.message.replace(/at position (\d+)/, (_, pos) => 15 'at position ' + (Number(pos) + parser.jsonParseOffset) 16 ); 17 } 18 19 return error; 20 } 21 22 function append(array, elements) { 23 // Note: Avoid to use array.push(...elements) since it may lead to 24 // "RangeError: Maximum call stack size exceeded" for a long arrays 25 const initialLength = array.length; 26 array.length += elements.length; 27 28 for (let i = 0; i < elements.length; i++) { 29 array[initialLength + i] = elements[i]; 30 } 31 } 32 33 module.exports = function(chunkEmitter) { 34 let parser = new ChunkParser(); 35 36 if (isObject(chunkEmitter) && isReadableStream(chunkEmitter)) { 37 return new Promise((resolve, reject) => { 38 chunkEmitter 39 .on('data', chunk => { 40 try { 41 parser.push(chunk); 42 } catch (e) { 43 reject(adjustPosition(e, parser)); 44 parser = null; 45 } 46 }) 47 .on('error', (e) => { 48 parser = null; 49 reject(e); 50 }) 51 .on('end', () => { 52 try { 53 resolve(parser.finish()); 54 } catch (e) { 55 reject(adjustPosition(e, parser)); 56 } finally { 57 parser = null; 58 } 59 }); 60 }); 61 } 62 63 if (typeof chunkEmitter === 'function') { 64 const iterator = chunkEmitter(); 65 66 if (isObject(iterator) && (Symbol.iterator in iterator || Symbol.asyncIterator in iterator)) { 67 return new Promise(async (resolve, reject) => { 68 try { 69 for await (const chunk of iterator) { 70 parser.push(chunk); 71 } 72 73 resolve(parser.finish()); 74 } catch (e) { 75 reject(adjustPosition(e, parser)); 76 } finally { 77 parser = null; 78 } 79 }); 80 } 81 } 82 83 throw new Error( 84 'Chunk emitter should be readable stream, generator, ' + 85 'async generator or function returning an iterable object' 86 ); 87 }; 88 89 class ChunkParser { 90 constructor() { 91 this.value = undefined; 92 this.valueStack = null; 93 94 this.stack = new Array(100); 95 this.lastFlushDepth = 0; 96 this.flushDepth = 0; 97 this.stateString = false; 98 this.stateStringEscape = false; 99 this.pendingByteSeq = null; 100 this.pendingChunk = null; 101 this.chunkOffset = 0; 102 this.jsonParseOffset = 0; 103 } 104 105 parseAndAppend(fragment, wrap) { 106 // Append new entries or elements 107 if (this.stack[this.lastFlushDepth - 1] === STACK_OBJECT) { 108 if (wrap) { 109 this.jsonParseOffset--; 110 fragment = '{' + fragment + '}'; 111 } 112 113 Object.assign(this.valueStack.value, JSON.parse(fragment)); 114 } else { 115 if (wrap) { 116 this.jsonParseOffset--; 117 fragment = '[' + fragment + ']'; 118 } 119 120 append(this.valueStack.value, JSON.parse(fragment)); 121 } 122 } 123 124 prepareAddition(fragment) { 125 const { value } = this.valueStack; 126 const expectComma = Array.isArray(value) 127 ? value.length !== 0 128 : Object.keys(value).length !== 0; 129 130 if (expectComma) { 131 // Skip a comma at the beginning of fragment, otherwise it would 132 // fail to parse 133 if (fragment[0] === ',') { 134 this.jsonParseOffset++; 135 return fragment.slice(1); 136 } 137 138 // When value (an object or array) is not empty and a fragment 139 // doesn't start with a comma, a single valid fragment starting 140 // is a closing bracket. If it's not, a prefix is adding to fail 141 // parsing. Otherwise, the sequence of chunks can be successfully 142 // parsed, although it should not, e.g. ["[{}", "{}]"] 143 if (fragment[0] !== '}' && fragment[0] !== ']') { 144 this.jsonParseOffset -= 3; 145 return '[[]' + fragment; 146 } 147 } 148 149 return fragment; 150 } 151 152 flush(chunk, start, end) { 153 let fragment = chunk.slice(start, end); 154 155 // Save position correction an error in JSON.parse() if any 156 this.jsonParseOffset = this.chunkOffset + start; 157 158 // Prepend pending chunk if any 159 if (this.pendingChunk !== null) { 160 fragment = this.pendingChunk + fragment; 161 this.jsonParseOffset -= this.pendingChunk.length; 162 this.pendingChunk = null; 163 } 164 165 if (this.flushDepth === this.lastFlushDepth) { 166 // Depth didn't changed, so it's a root value or entry/element set 167 if (this.flushDepth > 0) { 168 this.parseAndAppend(this.prepareAddition(fragment), true); 169 } else { 170 // That's an entire value on a top level 171 this.value = JSON.parse(fragment); 172 this.valueStack = { 173 value: this.value, 174 prev: null 175 }; 176 } 177 } else if (this.flushDepth > this.lastFlushDepth) { 178 // Add missed closing brackets/parentheses 179 for (let i = this.flushDepth - 1; i >= this.lastFlushDepth; i--) { 180 fragment += this.stack[i] === STACK_OBJECT ? '}' : ']'; 181 } 182 183 if (this.lastFlushDepth === 0) { 184 // That's a root value 185 this.value = JSON.parse(fragment); 186 this.valueStack = { 187 value: this.value, 188 prev: null 189 }; 190 } else { 191 this.parseAndAppend(this.prepareAddition(fragment), true); 192 } 193 194 // Move down to the depths to the last object/array, which is current now 195 for (let i = this.lastFlushDepth || 1; i < this.flushDepth; i++) { 196 let value = this.valueStack.value; 197 198 if (this.stack[i - 1] === STACK_OBJECT) { 199 // find last entry 200 let key; 201 // eslint-disable-next-line curly 202 for (key in value); 203 value = value[key]; 204 } else { 205 // last element 206 value = value[value.length - 1]; 207 } 208 209 this.valueStack = { 210 value, 211 prev: this.valueStack 212 }; 213 } 214 } else /* this.flushDepth < this.lastFlushDepth */ { 215 fragment = this.prepareAddition(fragment); 216 217 // Add missed opening brackets/parentheses 218 for (let i = this.lastFlushDepth - 1; i >= this.flushDepth; i--) { 219 this.jsonParseOffset--; 220 fragment = (this.stack[i] === STACK_OBJECT ? '{' : '[') + fragment; 221 } 222 223 this.parseAndAppend(fragment, false); 224 225 for (let i = this.lastFlushDepth - 1; i >= this.flushDepth; i--) { 226 this.valueStack = this.valueStack.prev; 227 } 228 } 229 230 this.lastFlushDepth = this.flushDepth; 231 } 232 233 push(chunk) { 234 if (typeof chunk !== 'string') { 235 // Suppose chunk is Buffer or Uint8Array 236 237 // Prepend uncompleted byte sequence if any 238 if (this.pendingByteSeq !== null) { 239 const origRawChunk = chunk; 240 chunk = new Uint8Array(this.pendingByteSeq.length + origRawChunk.length); 241 chunk.set(this.pendingByteSeq); 242 chunk.set(origRawChunk, this.pendingByteSeq.length); 243 this.pendingByteSeq = null; 244 } 245 246 // In case Buffer/Uint8Array, an input is encoded in UTF8 247 // Seek for parts of uncompleted UTF8 symbol on the ending 248 // This makes sense only if we expect more chunks and last char is not multi-bytes 249 if (chunk[chunk.length - 1] > 127) { 250 for (let seqLength = 0; seqLength < chunk.length; seqLength++) { 251 const byte = chunk[chunk.length - 1 - seqLength]; 252 253 // 10xxxxxx - 2nd, 3rd or 4th byte 254 // 110xxxxx – first byte of 2-byte sequence 255 // 1110xxxx - first byte of 3-byte sequence 256 // 11110xxx - first byte of 4-byte sequence 257 if (byte >> 6 === 3) { 258 seqLength++; 259 260 // If the sequence is really incomplete, then preserve it 261 // for the future chunk and cut off it from the current chunk 262 if ((seqLength !== 4 && byte >> 3 === 0b11110) || 263 (seqLength !== 3 && byte >> 4 === 0b1110) || 264 (seqLength !== 2 && byte >> 5 === 0b110)) { 265 this.pendingByteSeq = chunk.slice(chunk.length - seqLength); 266 chunk = chunk.slice(0, -seqLength); 267 } 268 269 break; 270 } 271 } 272 } 273 274 // Convert chunk to a string, since single decode per chunk 275 // is much effective than decode multiple small substrings 276 chunk = decoder.decode(chunk); 277 } 278 279 const chunkLength = chunk.length; 280 let lastFlushPoint = 0; 281 let flushPoint = 0; 282 283 // Main scan loop 284 scan: for (let i = 0; i < chunkLength; i++) { 285 if (this.stateString) { 286 for (; i < chunkLength; i++) { 287 if (this.stateStringEscape) { 288 this.stateStringEscape = false; 289 } else { 290 switch (chunk.charCodeAt(i)) { 291 case 0x22: /* " */ 292 this.stateString = false; 293 continue scan; 294 295 case 0x5C: /* \ */ 296 this.stateStringEscape = true; 297 } 298 } 299 } 300 301 break; 302 } 303 304 switch (chunk.charCodeAt(i)) { 305 case 0x22: /* " */ 306 this.stateString = true; 307 this.stateStringEscape = false; 308 break; 309 310 case 0x2C: /* , */ 311 flushPoint = i; 312 break; 313 314 case 0x7B: /* { */ 315 // Open an object 316 flushPoint = i + 1; 317 this.stack[this.flushDepth++] = STACK_OBJECT; 318 break; 319 320 case 0x5B: /* [ */ 321 // Open an array 322 flushPoint = i + 1; 323 this.stack[this.flushDepth++] = STACK_ARRAY; 324 break; 325 326 case 0x5D: /* ] */ 327 case 0x7D: /* } */ 328 // Close an object or array 329 flushPoint = i + 1; 330 this.flushDepth--; 331 332 if (this.flushDepth < this.lastFlushDepth) { 333 this.flush(chunk, lastFlushPoint, flushPoint); 334 lastFlushPoint = flushPoint; 335 } 336 337 break; 338 339 case 0x09: /* \t */ 340 case 0x0A: /* \n */ 341 case 0x0D: /* \r */ 342 case 0x20: /* space */ 343 // Move points forward when they points on current position and it's a whitespace 344 if (lastFlushPoint === i) { 345 lastFlushPoint++; 346 } 347 348 if (flushPoint === i) { 349 flushPoint++; 350 } 351 352 break; 353 } 354 } 355 356 if (flushPoint > lastFlushPoint) { 357 this.flush(chunk, lastFlushPoint, flushPoint); 358 } 359 360 // Produce pendingChunk if something left 361 if (flushPoint < chunkLength) { 362 if (this.pendingChunk !== null) { 363 // When there is already a pending chunk then no flush happened, 364 // appending entire chunk to pending one 365 this.pendingChunk += chunk; 366 } else { 367 // Create a pending chunk, it will start with non-whitespace since 368 // flushPoint was moved forward away from whitespaces on scan 369 this.pendingChunk = chunk.slice(flushPoint, chunkLength); 370 } 371 } 372 373 this.chunkOffset += chunkLength; 374 } 375 376 finish() { 377 if (this.pendingChunk !== null) { 378 this.flush('', 0, 0); 379 this.pendingChunk = null; 380 } 381 382 return this.value; 383 } 384 };