time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

_unicodeSize.js (1642B)


      1 /** Used to compose unicode character classes. */
      2 var rsAstralRange = '\\ud800-\\udfff',
      3     rsComboMarksRange = '\\u0300-\\u036f',
      4     reComboHalfMarksRange = '\\ufe20-\\ufe2f',
      5     rsComboSymbolsRange = '\\u20d0-\\u20ff',
      6     rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange,
      7     rsVarRange = '\\ufe0e\\ufe0f';
      8 
      9 /** Used to compose unicode capture groups. */
     10 var rsAstral = '[' + rsAstralRange + ']',
     11     rsCombo = '[' + rsComboRange + ']',
     12     rsFitz = '\\ud83c[\\udffb-\\udfff]',
     13     rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')',
     14     rsNonAstral = '[^' + rsAstralRange + ']',
     15     rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
     16     rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
     17     rsZWJ = '\\u200d';
     18 
     19 /** Used to compose unicode regexes. */
     20 var reOptMod = rsModifier + '?',
     21     rsOptVar = '[' + rsVarRange + ']?',
     22     rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*',
     23     rsSeq = rsOptVar + reOptMod + rsOptJoin,
     24     rsSymbol = '(?:' + [rsNonAstral + rsCombo + '?', rsCombo, rsRegional, rsSurrPair, rsAstral].join('|') + ')';
     25 
     26 /** Used to match [string symbols](https://mathiasbynens.be/notes/javascript-unicode). */
     27 var reUnicode = RegExp(rsFitz + '(?=' + rsFitz + ')|' + rsSymbol + rsSeq, 'g');
     28 
     29 /**
     30  * Gets the size of a Unicode `string`.
     31  *
     32  * @private
     33  * @param {string} string The string inspect.
     34  * @returns {number} Returns the string size.
     35  */
     36 function unicodeSize(string) {
     37   var result = reUnicode.lastIndex = 0;
     38   while (reUnicode.test(string)) {
     39     ++result;
     40   }
     41   return result;
     42 }
     43 
     44 module.exports = unicodeSize;