time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

main.js (4058B)


      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2020 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 'use strict';
     20 
     21 // MODULES //
     22 
     23 var isBoolean = require( '@stdlib/assert/is-boolean' ).isPrimitive;
     24 var isString = require( '@stdlib/assert/is-string' ).isPrimitive;
     25 var isNonNegativeInteger = require( '@stdlib/assert/is-nonnegative-integer' ).isPrimitive;
     26 var format = require( './../../format' );
     27 
     28 
     29 // VARIABLES //
     30 
     31 // Factors for converting individual surrogates
     32 var Ox10000 = 0x10000|0; // 65536
     33 var Ox400 = 0x400|0; // 1024
     34 
     35 // Range for a high surrogate
     36 var OxD800 = 0xD800|0; // 55296
     37 var OxDBFF = 0xDBFF|0; // 56319
     38 
     39 // Range for a low surrogate
     40 var OxDC00 = 0xDC00|0; // 56320
     41 var OxDFFF = 0xDFFF|0; // 57343
     42 
     43 
     44 // MAIN //
     45 
     46 /**
     47 * Returns a Unicode code point from a string at a specified position.
     48 *
     49 * ## Notes
     50 *
     51 * -   UTF-16 encoding uses one 16-bit unit for non-surrogates (U+0000 to U+D7FF and U+E000 to U+FFFF).
     52 * -   UTF-16 encoding uses two 16-bit units (surrogate pairs) for U+10000 to U+10FFFF and encodes U+10000-U+10FFFF by subtracting 0x10000 from the code point, expressing the result as a 20-bit binary, and splitting the 20 bits of 0x0-0xFFFFF as upper and lower 10-bits. The respective 10-bits are stored in two 16-bit words: a high and a low surrogate.
     53 *
     54 *
     55 * @param {string} str - input string
     56 * @param {NonNegativeInteger} idx - position
     57 * @param {boolean} [backward=false] - backward iteration for low surrogates
     58 * @throws {TypeError} first argument must be a string
     59 * @throws {TypeError} second argument must be a number having a nonnegative integer
     60 * @throws {TypeError} third argument must be a boolean
     61 * @throws {RangeError} position must be a valid index in string
     62 * @returns {NonNegativeInteger} code point
     63 *
     64 * @example
     65 * var out = codePointAt( 'last man standing', 4 );
     66 * // returns 32
     67 *
     68 * @example
     69 * var out = codePointAt( 'presidential election', 8, true );
     70 * // returns 116
     71 *
     72 * @example
     73 * var out = codePointAt( 'अनुच्छेद', 2 );
     74 * // returns 2369
     75 *
     76 * @example
     77 * var out = codePointAt( '🌷', 1, true );
     78 * // returns 127799
     79 */
     80 function codePointAt( str, idx, backward ) {
     81 	var code;
     82 	var FLG;
     83 	var low;
     84 	var hi;
     85 
     86 	if ( !isString( str ) ) {
     87 		throw new TypeError( format( 'invalid argument. Must provide a string. Value: `%s`.', str ) );
     88 	}
     89 	if ( !isNonNegativeInteger( idx ) ) {
     90 		throw new TypeError( format( 'invalid argument. Must provide a valid position (i.e., a nonnegative integer). Value: `%s`.', idx ) );
     91 	}
     92 	if ( idx >= str.length ) {
     93 		throw new RangeError( format( 'invalid argument. Must provide a valid position (i.e., within string bounds). Value: `%u`.', idx ) );
     94 	}
     95 	if ( arguments.length > 2 ) {
     96 		if ( !isBoolean( backward ) ) {
     97 			throw new TypeError( format( 'invalid argument. Third argument must be a boolean. Value: `%s`.', backward ) );
     98 		}
     99 		FLG = backward;
    100 	} else {
    101 		FLG = false;
    102 	}
    103 	code = str.charCodeAt( idx );
    104 
    105 	// High surrogate
    106 	if ( code >= OxD800 && code <= OxDBFF && idx < str.length - 1 ) {
    107 		hi = code;
    108 		low = str.charCodeAt( idx+1 );
    109 		if ( OxDC00 <= low && low <= OxDFFF ) {
    110 			return ( ( hi - OxD800 ) * Ox400 ) + ( low - OxDC00 ) + Ox10000;
    111 		}
    112 		return hi;
    113 	}
    114 	// Low surrogate - support only if backward iteration is desired
    115 	if ( FLG ) {
    116 		if ( code >= OxDC00 && code <= OxDFFF && idx >= 1 ) {
    117 			hi = str.charCodeAt( idx-1 );
    118 			low = code;
    119 			if ( OxD800 <= hi && hi <= OxDBFF ) {
    120 				return ( ( hi - OxD800 ) * Ox400 ) + ( low - OxDC00 ) + Ox10000;
    121 			}
    122 			return low;
    123 		}
    124 	}
    125 	return code;
    126 }
    127 
    128 
    129 // EXPORTS //
    130 
    131 module.exports = codePointAt;