Skip to content

Instantly share code, notes, and snippets.

@nberlette
Last active February 17, 2024 07:50
Show Gist options
  • Save nberlette/b15e05982e6fb33bc726f76f54d47b62 to your computer and use it in GitHub Desktop.
Save nberlette/b15e05982e6fb33bc726f76f54d47b62 to your computer and use it in GitHub Desktop.
`DataURL`: a custom subclass of `URL` to handle data-uri strings
/**
* User-extensible type for encoding types that can be used with the `DataURL`
* class. This interface can be extended using TypeSript's declaration merging,
* to supplement the built-in encoding types. */
// deno-lint-ignore no-empty-interface
export interface EncodingTypeMap {
"uri": "uri";
"url": "uri";
"uri-component": "uri";
"percent": "uri";
"base64": "base64";
"base64url": "base64";
"utf-8": "utf-8";
"utf8": "utf-8";
"us-ascii": "us-ascii";
"ascii": "us-ascii";
"utf-16": "utf-16";
"utf16": "utf-16";
"utf-16le": "utf-16";
"utf-16be": "utf-16";
"utf16le": "utf-16";
"utf16be": "utf-16";
}
export type Encoding = string & keyof EncodingTypeMap;
export type MaybeEncoding = strings | Encoding;
export type MaybeURL = string | URL | DataURL;
type strings = string & {};
export class DataURL extends URL {
constructor(url: MaybeURL, base?: MaybeURL) {
if (!url && !base) {
super("data:");
} else {
super(url, base);
}
}
isBase64() {
return this.encoding === "base64" && DataURL.#B64_RE.test(this.data);
}
get base64(): string {
const data = this.decode();
return DataURL.encodeBase64(data);
}
set base64(value: string) {
const encoded = DataURL.encodeBase64(btoa(value));
this.pathname = this.pathname.replace(/(?<=base64,).+$/, encoded);
}
get base64url(): string {
const data = this.decode();
return DataURL.encodeBase64Url(data);
}
set base64url(value: string) {
const encoded = DataURL.encodeBase64Url(value);
this.encoding = "base64";
this.pathname = this.pathname.replace(/(?<=base64,).+$/, encoded);
}
get data(): string {
return this.pathname.split(/,/, 2)?.[1] ?? "";
}
set data(value: string) {
value = DataURL.encode(value);
if (value.indexOf(",") > -1) {
let [encoding, data] = value.split(/,/);
encoding = DataURL.normalizeEncoding(encoding);
if (encoding.startsWith("base64")) {
if (DataURL.#B64_RE.test(data)) data = DataURL.decodeBase64(data);
// lets help out the user and encode it for them
this.encoding = "base64";
this.base64 = DataURL.encodeBase64Url(data);
} else if (encoding === "utf-8") {
data = DataURL.decodeURI(data);
this.encoding = "uri";
this.pathname = this.pathname.replace(/(?<=;).+?(?=,)/, "utf-8");
}
} else {
value = DataURL.encodeURI(value);
}
this.pathname = this.pathname.replace(
/(?<=,).+$/,
DataURL.encodeURI(value),
);
}
decode(): string {
let { data } = this;
if (this.encoding === "uri") data = DataURL.decodeURI(data);
if (this.encoding.startsWith("base64")) data = DataURL.decodeBase64(data);
return data;
}
get encoding(): Encoding {
return this.pathname.match(/(?<=;|^)[^;]+(?=,)/)?.[0] ?? "";
}
set encoding(value: Encoding | strings) {
value = String(value).toLowerCase();
if (value.startsWith("base64")) {
value = "base64";
this.base64 = this.data;
}
this.pathname = this.pathname.replace(/(?<=;|^)[^;]+(?=,)/, value);
}
get mimeType(): string {
return this.pathname.split(/,/)[0];
}
set mimeType(value: string) {
this.pathname = this.pathname.replace(/^[^,]+/, value);
}
toBlob(): Blob {
const type = this.mimeType || "application/octet-stream";
return new Blob([this.decode()], { type });
}
toBlobURL(): string {
return DataURL.createObjectURL(this.toBlob());
}
toJSON(): string {
return this.href;
}
toString(): string {
return this.href;
}
static #B64_RE = /^(?:[\w+\/\-]{4})*?(?:[\w+\/\-]{2}==|[\w+\/\-]{3}=)?$/;
static #URI_RE = /%[0-9a-f]{2}/ig;
static #encoder = new TextEncoder();
static #decoder = new TextDecoder("utf-8", { fatal: true, ignoreBOM: true });
static #decodeUtf8 = this.#decoder.decode.bind(this.#decoder);
static #encodeUtf8 = this.#encoder.encode.bind(this.#encoder);
static #encodeIntoUtf8 = this.#encoder.encodeInto.bind(this.#encoder);
static #urllru = new Map<string, [blob: Blob, url: string, time: number]>();
static #urllruLimit = 100;
static convertToASCII(s: string): string {
// convert all characters to ASCII, if possible. for those where conversion
// is not possible, replace them with the Unicode Replacement Character.
// s = s.replace(/[\u0300-\u036F]/g, "");
return String(s).normalize("NFKD").replace(/[^\x00-\x7F]/g, "�");
}
static convertToUnicode(s: string): string {
s = String(s).replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/ug, (c) => {
const hi = c.charCodeAt(0), lo = c.charCodeAt(1);
let code = (hi - 0xD800) * 0x400;
code += lo - 0xDC00 + 0x10000;
return String.fromCharCode(code);
});
const b = this.#encodeUtf8(s);
return this.#decodeUtf8(b);
}
static override createObjectURL(blob: Blob): string {
const lru = DataURL.#urllru;
const url = super.createObjectURL(blob);
lru.set(url, blob);
if (lru.size > lruMax) {
const [first] = lru.keys();
this.revokeObjectURL(first);
}
return url;
}
static decode(s: string, encoding: Encoding = "uri"): string {
s = String(s);
if (encoding === "base64") return this.decodeBase64(s);
if (encoding === "base64url") return this.decodeBase64(s);
return this.decodeURI(s);
}
static decodeBase64(s: string): string {
s = this.decodeURI(String(s));
if (this.#B64_RE.test(s)) {
try {
s = s.replace(/-/g, "+").replace(/_/g, "/");
while (s.length % 4) s += "="; // pad it out
return atob(s); // decode
} catch { /* ignore */ }
}
return s;
}
static decodeURI(s: string): string {
if (this.#URI_RE.test(s)) {
try {
return decodeURIComponent(s);
} catch { /* ignore */ }
}
return s;
}
static encode(s: string, encoding: Encoding | Encoding[] = "uri"): string {
s = String(s);
if (Array.isArray(encoding)) {
if (encoding.length > 1) {
for (const c of encoding.sort().toReversed()) s = DataURL.encode(s, c);
return s;
} else {
[encoding] = encoding; // this looks kinda funny
}
}
if (encoding === "base64") return DataURL.encodeBase64(s);
if (encoding === "base64url") return DataURL.encodeBase64Url(s);
return DataURL.encodeURI(s);
}
static encodeBase64(s: string): string {
return DataURL.#B64_RE.test(s) ? s : btoa(s);
}
static encodeBase64Url(s: string): string {
const b = DataURL.encodeBase64(s);
return b.replace(/\+/g, "-").replace(/\//g, "_").replace(/=/g, "");
}
static encodeURI(s: string): string {
if (s == null) return "";
if (typeof s !== "string") s = String(s);
try {
return encodeURIComponent(s);
} catch {
try {
return encodeURIComponent(DataURL.sanitize(s));
} catch {
return s;
}
}
}
static extractCharset(s: string): string {
return String(s).match(/(?<=charset=)[^;]+/)?.[0] ?? "utf-8";
}
static extractEncoding(s: string): Encoding {
return (String(s).match(/(?<=;)[^;]+?(?=,)/)?.[0] ?? "utf-8") as Encoding;
}
static extractMimeType(s: string): string {
return String(s).match(/^[^;,]+/)?.[0] ?? "text/plain;charset=us-ascii";
}
static normalizeEncoding(encoding: MaybeEncoding): Encoding {
const enc = String(encoding).trim().toLowerCase();
return encoding.replace(/[^a-z0-9\-=]+/g, "");
}
static sanitize(s: string): string {
s = String(s).replace(/[\x00-\x1F\x7F-\x9F]/g, (c) => {
return `%${c.charCodeAt(0).toString(16).toUpperCase()}`;
});
s = this.convertToASCII(s);
s = this.convertToUnicode(s);
return s;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment