time-to-botec

Benchmark sampling in different programming languages
Log | Files | Refs | README

main.js (3595B)


      1 /**
      2 * @license Apache-2.0
      3 *
      4 * Copyright (c) 2018 The Stdlib Authors.
      5 *
      6 * Licensed under the Apache License, Version 2.0 (the "License");
      7 * you may not use this file except in compliance with the License.
      8 * You may obtain a copy of the License at
      9 *
     10 *    http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing, software
     13 * distributed under the License is distributed on an "AS IS" BASIS,
     14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 * See the License for the specific language governing permissions and
     16 * limitations under the License.
     17 */
     18 
     19 'use strict';
     20 
     21 // MODULES //
     22 
     23 var isNonNegativeInteger = require( '@stdlib/assert/is-nonnegative-integer' ).isPrimitive;
     24 var isCollection = require( '@stdlib/assert/is-collection' );
     25 var format = require( './../../format' );
     26 var UNICODE_MAX = require( '@stdlib/constants/unicode/max' );
     27 var UNICODE_MAX_BMP = require( '@stdlib/constants/unicode/max-bmp' );
     28 
     29 
     30 // VARIABLES //
     31 
     32 var fromCharCode = String.fromCharCode;
     33 
     34 // Factor to rescale a code point from a supplementary plane:
     35 var Ox10000 = 0x10000|0; // 65536
     36 
     37 // Factor added to obtain a high surrogate:
     38 var OxD800 = 0xD800|0; // 55296
     39 
     40 // Factor added to obtain a low surrogate:
     41 var OxDC00 = 0xDC00|0; // 56320
     42 
     43 // 10-bit mask: 2^10-1 = 1023 => 0x3ff => 00000000 00000000 00000011 11111111
     44 var Ox3FF = 1023|0;
     45 
     46 
     47 // MAIN //
     48 
     49 /**
     50 * Creates a string from a sequence of Unicode code points.
     51 *
     52 * ## Notes
     53 *
     54 * -   UTF-16 encoding uses one 16-bit unit for non-surrogates (U+0000 to U+D7FF and U+E000 to U+FFFF).
     55 * -   UTF-16 encoding uses two 16-bit units (surrogate pairs) for U+10000 to U+10FFFF and encodes U+10000-U+10FFFF by subtracting 0x10000 from the code point, expressing the result as a 20-bit binary, and splitting the 20 bits of 0x0-0xFFFFF as upper and lower 10-bits. The respective 10-bits are stored in two 16-bit words: a high and a low surrogate.
     56 *
     57 *
     58 * @param {...NonNegativeInteger} args - sequence of code points
     59 * @throws {Error} must provide either an array-like object of code points or one or more code points as separate arguments
     60 * @throws {TypeError} a code point must be a nonnegative integer
     61 * @throws {RangeError} must provide a valid Unicode code point
     62 * @returns {string} created string
     63 *
     64 * @example
     65 * var str = fromCodePoint( 9731 );
     66 * // returns '☃'
     67 */
     68 function fromCodePoint( args ) {
     69 	var len;
     70 	var str;
     71 	var arr;
     72 	var low;
     73 	var hi;
     74 	var pt;
     75 	var i;
     76 
     77 	len = arguments.length;
     78 	if ( len === 1 && isCollection( args ) ) {
     79 		arr = arguments[ 0 ];
     80 		len = arr.length;
     81 	} else {
     82 		arr = [];
     83 		for ( i = 0; i < len; i++ ) {
     84 			arr.push( arguments[ i ] );
     85 		}
     86 	}
     87 	if ( len === 0 ) {
     88 		throw new Error( 'insufficient arguments. Must provide either an array of code points or one or more code points as separate arguments.' );
     89 	}
     90 	str = '';
     91 	for ( i = 0; i < len; i++ ) {
     92 		pt = arr[ i ];
     93 		if ( !isNonNegativeInteger( pt ) ) {
     94 			throw new TypeError( format( 'invalid argument. Must provide valid code points (i.e., nonnegative integers). Value: `%s`.', pt ) );
     95 		}
     96 		if ( pt > UNICODE_MAX ) {
     97 			throw new RangeError( format( 'invalid argument. Must provide a valid code point (i.e., cannot exceed %u). Value: `%s`.', UNICODE_MAX, pt ) );
     98 		}
     99 		if ( pt <= UNICODE_MAX_BMP ) {
    100 			str += fromCharCode( pt );
    101 		} else {
    102 			// Code point from a supplementary plane. Split into two 16-bit code units (surrogate pair).
    103 			pt -= Ox10000;
    104 			hi = (pt >> 10) + OxD800;
    105 			low = (pt & Ox3FF) + OxDC00;
    106 			str += fromCharCode( hi, low );
    107 		}
    108 	}
    109 	return str;
    110 }
    111 
    112 
    113 // EXPORTS //
    114 
    115 module.exports = fromCodePoint;