blob: cd8d41b5aa6e379c70503d4960d0928d0664b37c [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="csiso2022jp"> <!-- test breaks if the server overrides this -->
<title>csiso2022jp encoding (form)</title>
<meta name="timeout" content="long">
<meta name="variant" content="?1-1000">
<meta name="variant" content="?1001-2000">
<meta name="variant" content="?2001-3000">
<meta name="variant" content="?3001-4000">
<meta name="variant" content="?4001-5000">
<meta name="variant" content="?5001-6000">
<meta name="variant" content="?6001-7000">
<meta name="variant" content="?7001-last">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<script src="/common/subset-tests.js"></script>
<script src="jis0208_index.js"></script>
<script src="iso2022jp-encoder.js"></script>
<link rel="author" title="Richard Ishida" href="mailto:ishida@w3.org">
<link rel="help" href="https://encoding.spec.whatwg.org/#names-and-labels">
<meta name="assert" content="The browser produces the same encoding behavior for a document labeled 'csiso2022jp' as for a document labeled 'iso-2022-jp'.">
<style>
iframe { display:none }
form { display:none }
</style>
</head>
<body>
<div id="log"></div>
<script src="../../resources/ranges.js"></script>
<script>
var errors = false;
var separator = "\u0019";
var ranges = rangesAll;
function expect(result, codepoint) {
return result;
}
function encoder(str) {
return getByteSequence(str.codePointAt(0));
}
function getByteSequence(cp) {
// uses the Encoding spec algorithm to derive a sequence of bytes for a character that can be encoded
// the result is either a percent-encoded value or null (if the character can't be encoded)
// cp: integer, a code point number
var cps = [cp];
var endofstream = 2000000;
cps.push(endofstream);
var out = "";
var encState = "ascii";
var finished = false;
while (!finished) {
cp = cps.shift();
if (cp == endofstream && encState != "ascii") {
cps.unshift(cp);
encState = "ascii";
out += "%1B%28%42";
continue;
}
if (cp == endofstream && encState == "ascii") {
finished = true;
break;
}
if (encState == "ascii" && cp >= 0x00 && cp <= 0x7f) {
out += "%" + cp.toString(16).toUpperCase();
continue;
}
if (
encState == "roman" &&
((cp >= 0x00 && cp <= 0x7f && cp !== 0x5c && cp !== 0x7e) ||
cp == 0xa5 ||
cp == 0x203e)
) {
if (cp >= 0x00 && cp <= 0x7f) {
// ASCII
out += "%" + cp.toString(16).toUpperCase();
continue;
}
if (cp == 0xa5) {
out += "%5C";
continue;
}
if (cp == 0x203e) {
out += "%7E";
continue;
}
}
if (encState != "ascii" && cp >= 0x00 && cp <= 0x7f) {
cps.unshift(cp);
encState = "ascii";
out += "%1B%28%42";
continue;
}
if ((cp == 0xa5 || cp == 0x203e) && encState != "roman") {
cps.unshift(cp);
encState = "roman";
out += "%1B%28%4A";
continue;
}
if (cp == 0x2212) cp = 0xff0d;
ptr = indexcodepoints[cp];
if (ptr == null) {
return null;
}
if (encState != "jis0208") {
cps.unshift(cp);
encState = "jis0208";
out += "%1B%24%42";
continue;
}
var lead = Math.floor(ptr / 94) + 0x21;
var trail = ptr % 94 + 0x21;
out +=
"%" +
lead.toString(16).toUpperCase() +
"%" +
trail.toString(16).toUpperCase();
}
return out.trim();
}
// set up a sparse array of all unicode codepoints listed in the index
// this will be used for lookup in getByteSequence
var indexcodepoints = []; // index is unicode cp, value is pointer
for (p = 0; p < jis0208.length; p++) {
if (jis0208[p] != null && indexcodepoints[jis0208[p]] == null) {
indexcodepoints[jis0208[p]] = p;
}
}
</script>
<script src="../../resources/encode-form-common.js"></script>
</body>
</html>