| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- /**
- * Estimate decoded byte length of a data:// URL *without* allocating large buffers.
- * - For base64: compute exact decoded size using length and padding;
- * handle %XX at the character-count level (no string allocation).
- * - For non-base64: compute the exact percent-decoded UTF-8 byte length.
- *
- * @param {string} url
- * @returns {number}
- */
- const isHexDigit = (charCode) =>
- (charCode >= 48 && charCode <= 57) ||
- (charCode >= 65 && charCode <= 70) ||
- (charCode >= 97 && charCode <= 102);
-
- const isPercentEncodedByte = (str, i, len) =>
- i + 2 < len && isHexDigit(str.charCodeAt(i + 1)) && isHexDigit(str.charCodeAt(i + 2));
-
- export default function estimateDataURLDecodedBytes(url) {
- if (!url || typeof url !== 'string') return 0;
- if (!url.startsWith('data:')) return 0;
-
- const comma = url.indexOf(',');
- if (comma < 0) return 0;
-
- const meta = url.slice(5, comma);
- const body = url.slice(comma + 1);
- const isBase64 = /;base64/i.test(meta);
-
- if (isBase64) {
- let effectiveLen = body.length;
- const len = body.length; // cache length
-
- for (let i = 0; i < len; i++) {
- if (body.charCodeAt(i) === 37 /* '%' */ && i + 2 < len) {
- const a = body.charCodeAt(i + 1);
- const b = body.charCodeAt(i + 2);
- const isHex = isHexDigit(a) && isHexDigit(b);
-
- if (isHex) {
- effectiveLen -= 2;
- i += 2;
- }
- }
- }
-
- let pad = 0;
- let idx = len - 1;
-
- const tailIsPct3D = (j) =>
- j >= 2 &&
- body.charCodeAt(j - 2) === 37 && // '%'
- body.charCodeAt(j - 1) === 51 && // '3'
- (body.charCodeAt(j) === 68 || body.charCodeAt(j) === 100); // 'D' or 'd'
-
- if (idx >= 0) {
- if (body.charCodeAt(idx) === 61 /* '=' */) {
- pad++;
- idx--;
- } else if (tailIsPct3D(idx)) {
- pad++;
- idx -= 3;
- }
- }
-
- if (pad === 1 && idx >= 0) {
- if (body.charCodeAt(idx) === 61 /* '=' */) {
- pad++;
- } else if (tailIsPct3D(idx)) {
- pad++;
- }
- }
-
- const groups = Math.floor(effectiveLen / 4);
- const bytes = groups * 3 - (pad || 0);
- return bytes > 0 ? bytes : 0;
- }
-
- // Compute UTF-8 byte length directly from UTF-16 code units without allocating
- // a byte buffer (TextEncoder.encode would defeat the DoS guard on large bodies).
- // Valid %XX triplets count as one decoded byte; this matches the bytes that
- // decodeURIComponent(body) would produce before Buffer re-encodes the string.
- let bytes = 0;
- for (let i = 0, len = body.length; i < len; i++) {
- const c = body.charCodeAt(i);
- if (c === 37 /* '%' */ && isPercentEncodedByte(body, i, len)) {
- bytes += 1;
- i += 2;
- } else if (c < 0x80) {
- bytes += 1;
- } else if (c < 0x800) {
- bytes += 2;
- } else if (c >= 0xd800 && c <= 0xdbff && i + 1 < len) {
- const next = body.charCodeAt(i + 1);
- if (next >= 0xdc00 && next <= 0xdfff) {
- bytes += 4;
- i++;
- } else {
- bytes += 3;
- }
- } else {
- bytes += 3;
- }
- }
- return bytes;
- }
|