初始化

This commit is contained in:
2023-12-29 00:08:10 +08:00
commit 5ed0fc646f
512 changed files with 54378 additions and 0 deletions

View File

@@ -0,0 +1,393 @@
// This is free and unencumbered software released into the public domain.
// See LICENSE.md for more information.
var THE_ENCODING = ['utf-8'];
var LEGACY_ENCODINGS = [
'ibm866', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5',
'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-10',
'iso-8859-13', 'iso-8859-14', 'iso-8859-15', 'iso-8859-16', 'koi8-r',
'koi8-u', 'macintosh', 'windows-874', 'windows-1250', 'windows-1251',
'windows-1252', 'windows-1253', 'windows-1254', 'windows-1255',
'windows-1256', 'windows-1257', 'windows-1258', 'x-mac-cyrillic',
'gbk', 'gb18030', 'big5', 'euc-jp', 'iso-2022-jp', 'shift_jis',
'euc-kr', 'utf-16le', 'utf-16be'
];
var ASCII_SUPERSETS = THE_ENCODING.concat(LEGACY_ENCODINGS)
.filter(function(e) {
return e !== 'utf-16le' && e !== 'utf-16be';
});
// Miscellaneous tests
test(function() {
assert_false(/\[native code\]/.test(String(TextDecoder)),
'Native implementation present - polyfill not tested.');
}, 'TextDecoder Polyfill (will fail if natively supported)');
test(function() {
assert_false(/\[native code\]/.test(String(TextEncoder)),
'Native implementation present - polyfill not tested.');
}, 'TextEncoder Polyfill (will fail if natively supported)');
test(function() {
assert_true('encoding' in new TextEncoder());
assert_equals(new TextEncoder().encoding, 'utf-8');
assert_true('encoding' in new TextDecoder());
assert_equals(new TextDecoder().encoding, 'utf-8');
assert_equals(new TextDecoder('utf-16le').encoding, 'utf-16le');
assert_true('fatal' in new TextDecoder());
assert_false(new TextDecoder('utf-8').fatal);
assert_true(new TextDecoder('utf-8', {fatal: true}).fatal);
assert_true('ignoreBOM' in new TextDecoder());
assert_false(new TextDecoder('utf-8').ignoreBOM);
assert_true(new TextDecoder('utf-8', {ignoreBOM: true}).ignoreBOM);
}, 'Attributes');
test(function() {
var badStrings = [
{ input: '\ud800', expected: '\ufffd' }, // Surrogate half
{ input: '\udc00', expected: '\ufffd' }, // Surrogate half
{ input: 'abc\ud800def', expected: 'abc\ufffddef' }, // Surrogate half
{ input: 'abc\udc00def', expected: 'abc\ufffddef' }, // Surrogate half
{ input: '\udc00\ud800', expected: '\ufffd\ufffd' } // Wrong order
];
badStrings.forEach(
function(t) {
var encoded = new TextEncoder().encode(t.input);
var decoded = new TextDecoder().decode(encoded);
assert_equals(t.expected, decoded);
});
}, 'bad data');
test(function() {
var bad = [
{ encoding: 'utf-8', input: [0xC0] }, // ends early
{ encoding: 'utf-8', input: [0xC0, 0x00] }, // invalid trail
{ encoding: 'utf-8', input: [0xC0, 0xC0] }, // invalid trail
{ encoding: 'utf-8', input: [0xE0] }, // ends early
{ encoding: 'utf-8', input: [0xE0, 0x00] }, // invalid trail
{ encoding: 'utf-8', input: [0xE0, 0xC0] }, // invalid trail
{ encoding: 'utf-8', input: [0xE0, 0x80, 0x00] }, // invalid trail
{ encoding: 'utf-8', input: [0xE0, 0x80, 0xC0] }, // invalid trail
{ encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF
{ encoding: 'utf-16le', input: [0x00] }, // truncated code unit
{ encoding: 'utf-16le', input: [0x00, 0xd8] }, // surrogate half
{ encoding: 'utf-16le', input: [0x00, 0xd8, 0x00, 0x00] }, // surrogate half
{ encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0x00] }, // trail surrogate
{ encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0xd8] } // swapped surrogates
// TODO: Single byte encoding cases
];
bad.forEach(
function(t) {
assert_throws({name: 'TypeError'}, function() {
new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input));
});
});
}, 'fatal flag');
test(function() {
var encodings = [
{ label: 'utf-8', encoding: 'utf-8' },
{ label: 'utf-16', encoding: 'utf-16le' },
{ label: 'utf-16le', encoding: 'utf-16le' },
{ label: 'utf-16be', encoding: 'utf-16be' },
{ label: 'ascii', encoding: 'windows-1252' },
{ label: 'iso-8859-1', encoding: 'windows-1252' }
];
encodings.forEach(
function(test) {
assert_equals(new TextDecoder(test.label.toLowerCase()).encoding, test.encoding);
assert_equals(new TextDecoder(test.label.toUpperCase()).encoding, test.encoding);
});
}, 'Encoding names are case insensitive');
test(function() {
var utf8_bom = [0xEF, 0xBB, 0xBF];
var utf8 = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD];
var utf16le_bom = [0xff, 0xfe];
var utf16le = [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF];
var utf16be_bom = [0xfe, 0xff];
var utf16be = [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD];
var string = 'z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD'; // z, cent, CJK water, G-Clef, Private-use character
// missing BOMs
assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8)), string);
assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le)), string);
assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be)), string);
// matching BOMs
assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8_bom.concat(utf8))), string);
assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le_bom.concat(utf16le))), string);
assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be_bom.concat(utf16be))), string);
// matching BOMs split
var decoder8 = new TextDecoder('utf-8');
assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 1)), {stream: true}), '');
assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(1).concat(utf8))), string);
assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 2)), {stream: true}), '');
assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(2).concat(utf8))), string);
var decoder16le = new TextDecoder('utf-16le');
assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(0, 1)), {stream: true}), '');
assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(1).concat(utf16le))), string);
var decoder16be = new TextDecoder('utf-16be');
assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(0, 1)), {stream: true}), '');
assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(1).concat(utf16be))), string);
// mismatching BOMs
assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16le_bom.concat(utf8))), string);
assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16be_bom.concat(utf8))), string);
assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf8_bom.concat(utf16le))), string);
assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16be_bom.concat(utf16le))), string);
assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf8_bom.concat(utf16be))), string);
assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16le_bom.concat(utf16be))), string);
// ignore BOMs
assert_equals(new TextDecoder('utf-8', {ignoreBOM: true})
.decode(new Uint8Array(utf8_bom.concat(utf8))),
'\uFEFF' + string);
assert_equals(new TextDecoder('utf-16le', {ignoreBOM: true})
.decode(new Uint8Array(utf16le_bom.concat(utf16le))),
'\uFEFF' + string);
assert_equals(new TextDecoder('utf-16be', {ignoreBOM: true})
.decode(new Uint8Array(utf16be_bom.concat(utf16be))),
'\uFEFF' + string);
}, 'Byte-order marks');
test(function() {
assert_equals(new TextDecoder('utf-8').encoding, 'utf-8'); // canonical case
assert_equals(new TextDecoder('UTF-16').encoding, 'utf-16le'); // canonical case and name
assert_equals(new TextDecoder('UTF-16BE').encoding, 'utf-16be'); // canonical case and name
assert_equals(new TextDecoder('iso8859-1').encoding, 'windows-1252'); // canonical case and name
assert_equals(new TextDecoder('iso-8859-1').encoding, 'windows-1252'); // canonical case and name
}, 'Encoding names');
test(function() {
var string = '\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF';
var cases = [
{
encoding: 'utf-8',
encoded: [0, 49, 50, 51, 65, 66, 67, 97, 98, 99, 194, 128, 195, 191, 196,
128, 225, 128, 128, 239, 191, 189, 240, 144, 128, 128, 244, 143,
191, 191]
},
{
encoding: 'utf-16le',
encoded: [0, 0, 49, 0, 50, 0, 51, 0, 65, 0, 66, 0, 67, 0, 97, 0, 98, 0,
99, 0, 128, 0, 255, 0, 0, 1, 0, 16, 253, 255, 0, 216, 0, 220,
255, 219, 255, 223]
},
{
encoding: 'utf-16be',
encoded: [0, 0, 0, 49, 0, 50, 0, 51, 0, 65, 0, 66, 0, 67, 0, 97, 0, 98, 0,
99, 0, 128, 0, 255, 1, 0, 16, 0, 255, 253, 216, 0, 220, 0, 219,
255, 223, 255]
}
];
cases.forEach(function(c) {
for (var len = 1; len <= 5; ++len) {
var out = '', decoder = new TextDecoder(c.encoding);
for (var i = 0; i < c.encoded.length; i += len) {
var sub = [];
for (var j = i; j < c.encoded.length && j < i + len; ++j) {
sub.push(c.encoded[j]);
}
out += decoder.decode(new Uint8Array(sub), {stream: true});
}
out += decoder.decode();
assert_equals(out, string, 'streaming decode ' + c.encoding);
}
});
}, 'Streaming Decode');
test(function() {
var jis = [0x82, 0xC9, 0x82, 0xD9, 0x82, 0xF1];
var expected = '\u306B\u307B\u3093'; // Nihon
assert_equals(new TextDecoder('shift_jis').decode(new Uint8Array(jis)), expected);
}, 'Shift_JIS Decode');
test(function() {
ASCII_SUPERSETS.forEach(function(encoding) {
var string = '', bytes = [];
for (var i = 0; i < 128; ++i) {
// Encodings that have escape codes in 0x00-0x7F
if (encoding === 'iso-2022-jp' &&
(i === 0x0E || i === 0x0F || i === 0x1B))
continue;
string += String.fromCharCode(i);
bytes.push(i);
}
var ascii_encoded = new TextEncoder().encode(string);
assert_equals(new TextDecoder(encoding).decode(ascii_encoded), string, encoding);
});
}, 'Supersets of ASCII decode ASCII correctly');
test(function() {
assert_throws({name: 'TypeError'}, function() { new TextDecoder('utf-8', {fatal: true}).decode(new Uint8Array([0xff])); });
// This should not hang:
new TextDecoder('utf-8').decode(new Uint8Array([0xff]));
assert_throws({name: 'TypeError'}, function() { new TextDecoder('utf-16le', {fatal: true}).decode(new Uint8Array([0x00])); });
// This should not hang:
new TextDecoder('utf-16le').decode(new Uint8Array([0x00]));
assert_throws({name: 'TypeError'}, function() { new TextDecoder('utf-16be', {fatal: true}).decode(new Uint8Array([0x00])); });
// This should not hang:
new TextDecoder('utf-16be').decode(new Uint8Array([0x00]));
}, 'Non-fatal errors at EOF');
test(function() {
LEGACY_ENCODINGS.forEach(function(encoding) {
assert_equals(new TextDecoder(encoding).encoding, encoding);
assert_equals(new TextEncoder(encoding).encoding, 'utf-8');
});
}, 'Legacy encodings supported only for decode, not encode');
test(function() {
[
'csiso2022kr',
'hz-gb-2312',
'iso-2022-cn',
'iso-2022-cn-ext',
'iso-2022-kr'
].forEach(function(encoding) {
assert_equals(new TextEncoder(encoding).encoding, 'utf-8');
assert_throws({name: 'RangeError'},
function() {
var decoder = new TextDecoder(encoding, {fatal: true});
});
assert_throws({name: 'RangeError'},
function() {
var decoder = new TextDecoder(encoding, {fatal: false});
});
});
}, 'Replacement encoding labels');
test(function() {
var decoder = new TextDecoder();
var bytes = [65, 66, 97, 98, 99, 100, 101, 102, 103, 104, 67, 68, 69, 70, 71, 72];
var chars = 'ABabcdefghCDEFGH';
var buffer = new Uint8Array(bytes).buffer;
assert_equals(decoder.decode(buffer), chars,
'Decoding from ArrayBuffer should match expected text.');
['Uint8Array', 'Int8Array', 'Uint8ClampedArray',
'Uint16Array', 'Int16Array',
'Uint32Array', 'Int32Array',
'Float32Array', 'Float64Array'].forEach(function(typeName) {
var type = self[typeName];
var array = new type(buffer);
assert_equals(decoder.decode(array), chars,
'Decoding from ' + typeName + ' should match expected text.');
var subset = new type(buffer, type.BYTES_PER_ELEMENT, 8 / type.BYTES_PER_ELEMENT);
assert_equals(decoder.decode(subset),
chars.substring(type.BYTES_PER_ELEMENT, type.BYTES_PER_ELEMENT + 8),
'Decoding from ' + typeName + ' should match expected text.');
});
}, 'ArrayBuffer, ArrayBufferView and buffer offsets');
test(function() {
assert_throws({name: 'RangeError'},
function() { new TextDecoder(null); },
'Null should coerce to "null" and be invalid encoding name.');
assert_throws({name: 'TypeError'},
function() { new TextDecoder('utf-8', ''); },
'String should not coerce to dictionary.');
assert_throws({name: 'TypeError'},
function() { new TextDecoder('utf-8').decode(null, ''); },
'String should not coerce to dictionary.');
}, 'Invalid parameters');
test(function() {
assert_array_equals(
[249,249,249,233,249,235,249,234,164,81,164,202],
new TextEncoder('big5', {NONSTANDARD_allowLegacyEncoding: true})
.encode('\u2550\u255E\u2561\u256A\u5341\u5345'));
}, 'NONSTANDARD - regression tests');
test(function() {
// Regression test for https://github.com/whatwg/encoding/issues/22
assert_equals(
new TextDecoder('gb18030').decode(new Uint8Array([
0xA8, 0xBC,
0x81, 0x35, 0xF4, 0x37
])), '\u1E3F\uE7C7');
}, 'GB 18030 2000 vs 2005: U+1E3F, U+E7C7 (decoding)');
test(function() {
// Regression test for https://github.com/whatwg/encoding/issues/22
assert_array_equals(
new TextEncoder('gb18030', {NONSTANDARD_allowLegacyEncoding: true})
.encode('\u1E3F\uE7C7'),
[
0xA8, 0xBC,
0x81, 0x35, 0xF4, 0x37
]);
}, 'NONSTANDARD - GB 18030 2000 vs 2005: U+1E3F, U+E7C7 (encoding)');
test(function() {
// Regression test for https://github.com/whatwg/encoding/issues/17
assert_throws(
new TypeError,
function() {
new TextEncoder('gb18030', {NONSTANDARD_allowLegacyEncoding: true})
.encode('\uE5E5');
});
}, 'NONSTANDARD - gb18030: U+E5E5 (encoding)');
test(function() {
// Regression test for https://github.com/whatwg/encoding/issues/15
var encoder =
new TextEncoder('iso-2022-jp', {NONSTANDARD_allowLegacyEncoding: true});
[
//'\u000E', '\u000F', '\u001B',
'\u00A5\u000E', //'\u00A5\u000F', '\u00A5\u001B'
].forEach(function(s) {
assert_throws(new TypeError, function() { encoder.encode(s); });
});
}, 'NONSTANDARD - iso-2022-jp encoding attack (encoding)');
['utf-16le', 'utf-16be'].forEach(function(encoding) {
test(function() {
var encoder = new TextEncoder(encoding, {NONSTANDARD_allowLegacyEncoding: true});
var decoder = new TextDecoder(encoding);
var sample = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD";
assert_equals(decoder.decode(encoder.encode(sample)), sample);
}, 'NONSTANDARD - ' + encoding + ' (encoding)');
});
test(function() {
var encoder = new TextEncoder();
assert_array_equals([].slice.call(encoder.encode(false)), [102, 97, 108, 115, 101]);
assert_array_equals([].slice.call(encoder.encode(0)), [48]);
}, 'encode() called with falsy arguments (polyfill bindings)');
test(function() {
// Regression test for https://github.com/inexorabletash/text-encoding/issues/59
assert_array_equals(
new TextDecoder('windows-1255').decode(new Uint8Array([0xCA])), '\u05BA');
}, 'windows-1255 map 0xCA to U+05BA');