You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.1 KiB
99 lines
3.1 KiB
"use strict" |
|
|
|
var defaults = require('defaults') |
|
var combining = require('./combining') |
|
|
|
var DEFAULTS = { |
|
nul: 0, |
|
control: 0 |
|
} |
|
|
|
module.exports = function wcwidth(str) { |
|
return wcswidth(str, DEFAULTS) |
|
} |
|
|
|
module.exports.config = function(opts) { |
|
opts = defaults(opts || {}, DEFAULTS) |
|
return function wcwidth(str) { |
|
return wcswidth(str, opts) |
|
} |
|
} |
|
|
|
/* |
|
* The following functions define the column width of an ISO 10646 |
|
* character as follows: |
|
* - The null character (U+0000) has a column width of 0. |
|
* - Other C0/C1 control characters and DEL will lead to a return value |
|
* of -1. |
|
* - Non-spacing and enclosing combining characters (general category |
|
* code Mn or Me in the |
|
* Unicode database) have a column width of 0. |
|
* - SOFT HYPHEN (U+00AD) has a column width of 1. |
|
* - Other format characters (general category code Cf in the Unicode |
|
* database) and ZERO WIDTH |
|
* SPACE (U+200B) have a column width of 0. |
|
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) |
|
* have a column width of 0. |
|
* - Spacing characters in the East Asian Wide (W) or East Asian |
|
* Full-width (F) category as |
|
* defined in Unicode Technical Report #11 have a column width of 2. |
|
* - All remaining characters (including all printable ISO 8859-1 and |
|
* WGL4 characters, Unicode control characters, etc.) have a column |
|
* width of 1. |
|
* This implementation assumes that characters are encoded in ISO 10646. |
|
*/ |
|
|
|
function wcswidth(str, opts) { |
|
if (typeof str !== 'string') return wcwidth(str, opts) |
|
|
|
var s = 0 |
|
for (var i = 0; i < str.length; i++) { |
|
var n = wcwidth(str.charCodeAt(i), opts) |
|
if (n < 0) return -1 |
|
s += n |
|
} |
|
|
|
return s |
|
} |
|
|
|
function wcwidth(ucs, opts) { |
|
// test for 8-bit control characters |
|
if (ucs === 0) return opts.nul |
|
if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return opts.control |
|
|
|
// binary search in table of non-spacing characters |
|
if (bisearch(ucs)) return 0 |
|
|
|
// if we arrive here, ucs is not a combining or C0/C1 control character |
|
return 1 + |
|
(ucs >= 0x1100 && |
|
(ucs <= 0x115f || // Hangul Jamo init. consonants |
|
ucs == 0x2329 || ucs == 0x232a || |
|
(ucs >= 0x2e80 && ucs <= 0xa4cf && |
|
ucs != 0x303f) || // CJK ... Yi |
|
(ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables |
|
(ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compatibility Ideographs |
|
(ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms |
|
(ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compatibility Forms |
|
(ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms |
|
(ucs >= 0xffe0 && ucs <= 0xffe6) || |
|
(ucs >= 0x20000 && ucs <= 0x2fffd) || |
|
(ucs >= 0x30000 && ucs <= 0x3fffd))); |
|
} |
|
|
|
function bisearch(ucs) { |
|
var min = 0 |
|
var max = combining.length - 1 |
|
var mid |
|
|
|
if (ucs < combining[0][0] || ucs > combining[max][1]) return false |
|
|
|
while (max >= min) { |
|
mid = Math.floor((min + max) / 2) |
|
if (ucs > combining[mid][1]) min = mid + 1 |
|
else if (ucs < combining[mid][0]) max = mid - 1 |
|
else return true |
|
} |
|
|
|
return false |
|
}
|
|
|