You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
143 lines
4.8 KiB
JavaScript
143 lines
4.8 KiB
JavaScript
1 year ago
|
"use strict";
|
||
|
const { isASCIIHex } = require("./infra");
|
||
|
const { utf8Encode } = require("./encoding");
|
||
|
|
||
|
function p(char) {
|
||
|
return char.codePointAt(0);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#percent-encode
|
||
|
function percentEncode(c) {
|
||
|
let hex = c.toString(16).toUpperCase();
|
||
|
if (hex.length === 1) {
|
||
|
hex = `0${hex}`;
|
||
|
}
|
||
|
|
||
|
return `%${hex}`;
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#percent-decode
|
||
|
function percentDecodeBytes(input) {
|
||
|
const output = new Uint8Array(input.byteLength);
|
||
|
let outputIndex = 0;
|
||
|
for (let i = 0; i < input.byteLength; ++i) {
|
||
|
const byte = input[i];
|
||
|
if (byte !== 0x25) {
|
||
|
output[outputIndex++] = byte;
|
||
|
} else if (byte === 0x25 && (!isASCIIHex(input[i + 1]) || !isASCIIHex(input[i + 2]))) {
|
||
|
output[outputIndex++] = byte;
|
||
|
} else {
|
||
|
const bytePoint = parseInt(String.fromCodePoint(input[i + 1], input[i + 2]), 16);
|
||
|
output[outputIndex++] = bytePoint;
|
||
|
i += 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return output.slice(0, outputIndex);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#string-percent-decode
|
||
|
function percentDecodeString(input) {
|
||
|
const bytes = utf8Encode(input);
|
||
|
return percentDecodeBytes(bytes);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
|
||
|
function isC0ControlPercentEncode(c) {
|
||
|
return c <= 0x1F || c > 0x7E;
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#fragment-percent-encode-set
|
||
|
const extraFragmentPercentEncodeSet = new Set([p(" "), p("\""), p("<"), p(">"), p("`")]);
|
||
|
function isFragmentPercentEncode(c) {
|
||
|
return isC0ControlPercentEncode(c) || extraFragmentPercentEncodeSet.has(c);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#query-percent-encode-set
|
||
|
const extraQueryPercentEncodeSet = new Set([p(" "), p("\""), p("#"), p("<"), p(">")]);
|
||
|
function isQueryPercentEncode(c) {
|
||
|
return isC0ControlPercentEncode(c) || extraQueryPercentEncodeSet.has(c);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#special-query-percent-encode-set
|
||
|
function isSpecialQueryPercentEncode(c) {
|
||
|
return isQueryPercentEncode(c) || c === p("'");
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#path-percent-encode-set
|
||
|
const extraPathPercentEncodeSet = new Set([p("?"), p("`"), p("{"), p("}")]);
|
||
|
function isPathPercentEncode(c) {
|
||
|
return isQueryPercentEncode(c) || extraPathPercentEncodeSet.has(c);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#userinfo-percent-encode-set
|
||
|
const extraUserinfoPercentEncodeSet =
|
||
|
new Set([p("/"), p(":"), p(";"), p("="), p("@"), p("["), p("\\"), p("]"), p("^"), p("|")]);
|
||
|
function isUserinfoPercentEncode(c) {
|
||
|
return isPathPercentEncode(c) || extraUserinfoPercentEncodeSet.has(c);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#component-percent-encode-set
|
||
|
const extraComponentPercentEncodeSet = new Set([p("$"), p("%"), p("&"), p("+"), p(",")]);
|
||
|
function isComponentPercentEncode(c) {
|
||
|
return isUserinfoPercentEncode(c) || extraComponentPercentEncodeSet.has(c);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set
|
||
|
const extraURLEncodedPercentEncodeSet = new Set([p("!"), p("'"), p("("), p(")"), p("~")]);
|
||
|
function isURLEncodedPercentEncode(c) {
|
||
|
return isComponentPercentEncode(c) || extraURLEncodedPercentEncodeSet.has(c);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#code-point-percent-encode-after-encoding
|
||
|
// https://url.spec.whatwg.org/#utf-8-percent-encode
|
||
|
// Assuming encoding is always utf-8 allows us to trim one of the logic branches. TODO: support encoding.
|
||
|
// The "-Internal" variant here has code points as JS strings. The external version used by other files has code points
|
||
|
// as JS numbers, like the rest of the codebase.
|
||
|
function utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate) {
|
||
|
const bytes = utf8Encode(codePoint);
|
||
|
let output = "";
|
||
|
for (const byte of bytes) {
|
||
|
// Our percentEncodePredicate operates on bytes, not code points, so this is slightly different from the spec.
|
||
|
if (!percentEncodePredicate(byte)) {
|
||
|
output += String.fromCharCode(byte);
|
||
|
} else {
|
||
|
output += percentEncode(byte);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return output;
|
||
|
}
|
||
|
|
||
|
function utf8PercentEncodeCodePoint(codePoint, percentEncodePredicate) {
|
||
|
return utf8PercentEncodeCodePointInternal(String.fromCodePoint(codePoint), percentEncodePredicate);
|
||
|
}
|
||
|
|
||
|
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
|
||
|
// https://url.spec.whatwg.org/#string-utf-8-percent-encode
|
||
|
function utf8PercentEncodeString(input, percentEncodePredicate, spaceAsPlus = false) {
|
||
|
let output = "";
|
||
|
for (const codePoint of input) {
|
||
|
if (spaceAsPlus && codePoint === " ") {
|
||
|
output += "+";
|
||
|
} else {
|
||
|
output += utf8PercentEncodeCodePointInternal(codePoint, percentEncodePredicate);
|
||
|
}
|
||
|
}
|
||
|
return output;
|
||
|
}
|
||
|
|
||
|
module.exports = {
|
||
|
isC0ControlPercentEncode,
|
||
|
isFragmentPercentEncode,
|
||
|
isQueryPercentEncode,
|
||
|
isSpecialQueryPercentEncode,
|
||
|
isPathPercentEncode,
|
||
|
isUserinfoPercentEncode,
|
||
|
isURLEncodedPercentEncode,
|
||
|
percentDecodeString,
|
||
|
percentDecodeBytes,
|
||
|
utf8PercentEncodeString,
|
||
|
utf8PercentEncodeCodePoint
|
||
|
};
|