simple-squiggle

A restricted subset of Squiggle
Log | Files | Refs | README

parse-chunked.js (13331B)


      1 const { isReadableStream } = require('./utils');
      2 const TextDecoder = require('./text-decoder');
      3 
      4 const STACK_OBJECT = 1;
      5 const STACK_ARRAY = 2;
      6 const decoder = new TextDecoder();
      7 
      8 function isObject(value) {
      9     return value !== null && typeof value === 'object';
     10 }
     11 
     12 function adjustPosition(error, parser) {
     13     if (error.name === 'SyntaxError' && parser.jsonParseOffset) {
     14         error.message = error.message.replace(/at position (\d+)/, (_, pos) =>
     15             'at position ' + (Number(pos) + parser.jsonParseOffset)
     16         );
     17     }
     18 
     19     return error;
     20 }
     21 
     22 function append(array, elements) {
     23     // Note: Avoid to use array.push(...elements) since it may lead to
     24     // "RangeError: Maximum call stack size exceeded" for a long arrays
     25     const initialLength = array.length;
     26     array.length += elements.length;
     27 
     28     for (let i = 0; i < elements.length; i++) {
     29         array[initialLength + i] = elements[i];
     30     }
     31 }
     32 
     33 module.exports = function(chunkEmitter) {
     34     let parser = new ChunkParser();
     35 
     36     if (isObject(chunkEmitter) && isReadableStream(chunkEmitter)) {
     37         return new Promise((resolve, reject) => {
     38             chunkEmitter
     39                 .on('data', chunk => {
     40                     try {
     41                         parser.push(chunk);
     42                     } catch (e) {
     43                         reject(adjustPosition(e, parser));
     44                         parser = null;
     45                     }
     46                 })
     47                 .on('error', (e) => {
     48                     parser = null;
     49                     reject(e);
     50                 })
     51                 .on('end', () => {
     52                     try {
     53                         resolve(parser.finish());
     54                     } catch (e) {
     55                         reject(adjustPosition(e, parser));
     56                     } finally {
     57                         parser = null;
     58                     }
     59                 });
     60         });
     61     }
     62 
     63     if (typeof chunkEmitter === 'function') {
     64         const iterator = chunkEmitter();
     65 
     66         if (isObject(iterator) && (Symbol.iterator in iterator || Symbol.asyncIterator in iterator)) {
     67             return new Promise(async (resolve, reject) => {
     68                 try {
     69                     for await (const chunk of iterator) {
     70                         parser.push(chunk);
     71                     }
     72 
     73                     resolve(parser.finish());
     74                 } catch (e) {
     75                     reject(adjustPosition(e, parser));
     76                 } finally {
     77                     parser = null;
     78                 }
     79             });
     80         }
     81     }
     82 
     83     throw new Error(
     84         'Chunk emitter should be readable stream, generator, ' +
     85         'async generator or function returning an iterable object'
     86     );
     87 };
     88 
     89 class ChunkParser {
     90     constructor() {
     91         this.value = undefined;
     92         this.valueStack = null;
     93 
     94         this.stack = new Array(100);
     95         this.lastFlushDepth = 0;
     96         this.flushDepth = 0;
     97         this.stateString = false;
     98         this.stateStringEscape = false;
     99         this.pendingByteSeq = null;
    100         this.pendingChunk = null;
    101         this.chunkOffset = 0;
    102         this.jsonParseOffset = 0;
    103     }
    104 
    105     parseAndAppend(fragment, wrap) {
    106         // Append new entries or elements
    107         if (this.stack[this.lastFlushDepth - 1] === STACK_OBJECT) {
    108             if (wrap) {
    109                 this.jsonParseOffset--;
    110                 fragment = '{' + fragment + '}';
    111             }
    112 
    113             Object.assign(this.valueStack.value, JSON.parse(fragment));
    114         } else {
    115             if (wrap) {
    116                 this.jsonParseOffset--;
    117                 fragment = '[' + fragment + ']';
    118             }
    119 
    120             append(this.valueStack.value, JSON.parse(fragment));
    121         }
    122     }
    123 
    124     prepareAddition(fragment) {
    125         const { value } = this.valueStack;
    126         const expectComma = Array.isArray(value)
    127             ? value.length !== 0
    128             : Object.keys(value).length !== 0;
    129 
    130         if (expectComma) {
    131             // Skip a comma at the beginning of fragment, otherwise it would
    132             // fail to parse
    133             if (fragment[0] === ',') {
    134                 this.jsonParseOffset++;
    135                 return fragment.slice(1);
    136             }
    137 
    138             // When value (an object or array) is not empty and a fragment
    139             // doesn't start with a comma, a single valid fragment starting
    140             // is a closing bracket. If it's not, a prefix is adding to fail
    141             // parsing. Otherwise, the sequence of chunks can be successfully
    142             // parsed, although it should not, e.g. ["[{}", "{}]"]
    143             if (fragment[0] !== '}' && fragment[0] !== ']') {
    144                 this.jsonParseOffset -= 3;
    145                 return '[[]' + fragment;
    146             }
    147         }
    148 
    149         return fragment;
    150     }
    151 
    152     flush(chunk, start, end) {
    153         let fragment = chunk.slice(start, end);
    154 
    155         // Save position correction an error in JSON.parse() if any
    156         this.jsonParseOffset = this.chunkOffset + start;
    157 
    158         // Prepend pending chunk if any
    159         if (this.pendingChunk !== null) {
    160             fragment = this.pendingChunk + fragment;
    161             this.jsonParseOffset -= this.pendingChunk.length;
    162             this.pendingChunk = null;
    163         }
    164 
    165         if (this.flushDepth === this.lastFlushDepth) {
    166             // Depth didn't changed, so it's a root value or entry/element set
    167             if (this.flushDepth > 0) {
    168                 this.parseAndAppend(this.prepareAddition(fragment), true);
    169             } else {
    170                 // That's an entire value on a top level
    171                 this.value = JSON.parse(fragment);
    172                 this.valueStack = {
    173                     value: this.value,
    174                     prev: null
    175                 };
    176             }
    177         } else if (this.flushDepth > this.lastFlushDepth) {
    178             // Add missed closing brackets/parentheses
    179             for (let i = this.flushDepth - 1; i >= this.lastFlushDepth; i--) {
    180                 fragment += this.stack[i] === STACK_OBJECT ? '}' : ']';
    181             }
    182 
    183             if (this.lastFlushDepth === 0) {
    184                 // That's a root value
    185                 this.value = JSON.parse(fragment);
    186                 this.valueStack = {
    187                     value: this.value,
    188                     prev: null
    189                 };
    190             } else {
    191                 this.parseAndAppend(this.prepareAddition(fragment), true);
    192             }
    193 
    194             // Move down to the depths to the last object/array, which is current now
    195             for (let i = this.lastFlushDepth || 1; i < this.flushDepth; i++) {
    196                 let value = this.valueStack.value;
    197 
    198                 if (this.stack[i - 1] === STACK_OBJECT) {
    199                     // find last entry
    200                     let key;
    201                     // eslint-disable-next-line curly
    202                     for (key in value);
    203                     value = value[key];
    204                 } else {
    205                     // last element
    206                     value = value[value.length - 1];
    207                 }
    208 
    209                 this.valueStack = {
    210                     value,
    211                     prev: this.valueStack
    212                 };
    213             }
    214         } else /* this.flushDepth < this.lastFlushDepth */ {
    215             fragment = this.prepareAddition(fragment);
    216 
    217             // Add missed opening brackets/parentheses
    218             for (let i = this.lastFlushDepth - 1; i >= this.flushDepth; i--) {
    219                 this.jsonParseOffset--;
    220                 fragment = (this.stack[i] === STACK_OBJECT ? '{' : '[') + fragment;
    221             }
    222 
    223             this.parseAndAppend(fragment, false);
    224 
    225             for (let i = this.lastFlushDepth - 1; i >= this.flushDepth; i--) {
    226                 this.valueStack = this.valueStack.prev;
    227             }
    228         }
    229 
    230         this.lastFlushDepth = this.flushDepth;
    231     }
    232 
    233     push(chunk) {
    234         if (typeof chunk !== 'string') {
    235             // Suppose chunk is Buffer or Uint8Array
    236 
    237             // Prepend uncompleted byte sequence if any
    238             if (this.pendingByteSeq !== null) {
    239                 const origRawChunk = chunk;
    240                 chunk = new Uint8Array(this.pendingByteSeq.length + origRawChunk.length);
    241                 chunk.set(this.pendingByteSeq);
    242                 chunk.set(origRawChunk, this.pendingByteSeq.length);
    243                 this.pendingByteSeq = null;
    244             }
    245 
    246             // In case Buffer/Uint8Array, an input is encoded in UTF8
    247             // Seek for parts of uncompleted UTF8 symbol on the ending
    248             // This makes sense only if we expect more chunks and last char is not multi-bytes
    249             if (chunk[chunk.length - 1] > 127) {
    250                 for (let seqLength = 0; seqLength < chunk.length; seqLength++) {
    251                     const byte = chunk[chunk.length - 1 - seqLength];
    252 
    253                     // 10xxxxxx - 2nd, 3rd or 4th byte
    254                     // 110xxxxx – first byte of 2-byte sequence
    255                     // 1110xxxx - first byte of 3-byte sequence
    256                     // 11110xxx - first byte of 4-byte sequence
    257                     if (byte >> 6 === 3) {
    258                         seqLength++;
    259 
    260                         // If the sequence is really incomplete, then preserve it
    261                         // for the future chunk and cut off it from the current chunk
    262                         if ((seqLength !== 4 && byte >> 3 === 0b11110) ||
    263                             (seqLength !== 3 && byte >> 4 === 0b1110) ||
    264                             (seqLength !== 2 && byte >> 5 === 0b110)) {
    265                             this.pendingByteSeq = chunk.slice(chunk.length - seqLength);
    266                             chunk = chunk.slice(0, -seqLength);
    267                         }
    268 
    269                         break;
    270                     }
    271                 }
    272             }
    273 
    274             // Convert chunk to a string, since single decode per chunk
    275             // is much effective than decode multiple small substrings
    276             chunk = decoder.decode(chunk);
    277         }
    278 
    279         const chunkLength = chunk.length;
    280         let lastFlushPoint = 0;
    281         let flushPoint = 0;
    282 
    283         // Main scan loop
    284         scan: for (let i = 0; i < chunkLength; i++) {
    285             if (this.stateString) {
    286                 for (; i < chunkLength; i++) {
    287                     if (this.stateStringEscape) {
    288                         this.stateStringEscape = false;
    289                     } else {
    290                         switch (chunk.charCodeAt(i)) {
    291                             case 0x22: /* " */
    292                                 this.stateString = false;
    293                                 continue scan;
    294 
    295                             case 0x5C: /* \ */
    296                                 this.stateStringEscape = true;
    297                         }
    298                     }
    299                 }
    300 
    301                 break;
    302             }
    303 
    304             switch (chunk.charCodeAt(i)) {
    305                 case 0x22: /* " */
    306                     this.stateString = true;
    307                     this.stateStringEscape = false;
    308                     break;
    309 
    310                 case 0x2C: /* , */
    311                     flushPoint = i;
    312                     break;
    313 
    314                 case 0x7B: /* { */
    315                     // Open an object
    316                     flushPoint = i + 1;
    317                     this.stack[this.flushDepth++] = STACK_OBJECT;
    318                     break;
    319 
    320                 case 0x5B: /* [ */
    321                     // Open an array
    322                     flushPoint = i + 1;
    323                     this.stack[this.flushDepth++] = STACK_ARRAY;
    324                     break;
    325 
    326                 case 0x5D: /* ] */
    327                 case 0x7D: /* } */
    328                     // Close an object or array
    329                     flushPoint = i + 1;
    330                     this.flushDepth--;
    331 
    332                     if (this.flushDepth < this.lastFlushDepth) {
    333                         this.flush(chunk, lastFlushPoint, flushPoint);
    334                         lastFlushPoint = flushPoint;
    335                     }
    336 
    337                     break;
    338 
    339                 case 0x09: /* \t */
    340                 case 0x0A: /* \n */
    341                 case 0x0D: /* \r */
    342                 case 0x20: /* space */
    343                     // Move points forward when they points on current position and it's a whitespace
    344                     if (lastFlushPoint === i) {
    345                         lastFlushPoint++;
    346                     }
    347 
    348                     if (flushPoint === i) {
    349                         flushPoint++;
    350                     }
    351 
    352                     break;
    353             }
    354         }
    355 
    356         if (flushPoint > lastFlushPoint) {
    357             this.flush(chunk, lastFlushPoint, flushPoint);
    358         }
    359 
    360         // Produce pendingChunk if something left
    361         if (flushPoint < chunkLength) {
    362             if (this.pendingChunk !== null) {
    363                 // When there is already a pending chunk then no flush happened,
    364                 // appending entire chunk to pending one
    365                 this.pendingChunk += chunk;
    366             } else {
    367                 // Create a pending chunk, it will start with non-whitespace since
    368                 // flushPoint was moved forward away from whitespaces on scan
    369                 this.pendingChunk = chunk.slice(flushPoint, chunkLength);
    370             }
    371         }
    372 
    373         this.chunkOffset += chunkLength;
    374     }
    375 
    376     finish() {
    377         if (this.pendingChunk !== null) {
    378             this.flush('', 0, 0);
    379             this.pendingChunk = null;
    380         }
    381 
    382         return this.value;
    383     }
    384 };