Skip to content

Instantly share code, notes, and snippets.

@en30
Created September 27, 2018 05:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save en30/57d46f7560be7ab3f36493153cbbc1c9 to your computer and use it in GitHub Desktop.
Save en30/57d46f7560be7ab3f36493153cbbc1c9 to your computer and use it in GitHub Desktop.
handle charset
import { AxiosResponse } from 'axios';
import contentType = require('content-type');
import cheerio = require('cheerio');
type Charset = string;
type IntermediateResult = Charset | null;
// to assert elements as tuple (inferred Array<string | Buffer>)
const bomify = ([c, bytes]) => ([c, Buffer.from(bytes)] as [Charset, Buffer]);
const BOMS: ReadonlyMap<Charset, Buffer> = new Map([
['utf-8', [0xEF, 0xBB, 0xBF]],
['utf-16be', [0xFE, 0xFF]],
['utf-16le', [0xFF, 0xFE]],
['utf-7', [0x2B, 0x2F, 0x76, 0x38]],
['utf-7', [0x2B, 0x2F, 0x76, 0x39]],
['utf-7', [0x2B, 0x2F, 0x76, 0x2B]],
['utf-7', [0x2B, 0x2F, 0x76, 0x3F]],
['utf-7', [0x2B, 0x2F, 0x76, 0x38, 0x2D]],
['utf-1', [0xF7, 0x64, 0x4C]],
['utf-ebcdic', [0xDD, 0x73, 0x66, 0x73]],
['scsu', [0x0E, 0xFE, 0xFF]],
['bocu-1', [0xFB, 0xEE, 0x28]],
['gb-18030', [0x84, 0x31, 0x95, 0x33]],
].map(bomify));
export const fromBOM = (buf): IntermediateResult => {
const startsWith = (bom) =>
buf.slice(0, bom.length).equals(bom)
for (let [charset, bom] of BOMS) {
if (startsWith(bom)) return charset;
}
return null;
}
export const fromHeader = (ctype): IntermediateResult => {
const res = contentType.parse(ctype);
return res.parameters.charset || null;
}
export const fromMetaTag = (buf): IntermediateResult => {
const $ = cheerio.load(buf.toString('ascii'));
let res = $('meta[charset]').attr('charset');
if (res) return res;
res = $('meta[http-equiv="Content-Type"]').attr('content');
if (res) return fromHeader(res);
return null;
}
export const detect = (res: AxiosResponse): Charset =>
fromBOM(res.data) ||
fromHeader(res.headers["content-type"]) ||
fromMetaTag(res.data) ||
'utf-8';
import axios from 'axios';
import iconv = require('iconv-lite');
import * as charset from './charset';
(async () => {
const response = await axios.get(url, { responseType: 'arraybuffer' });
const body = iconv.decode(response.data, charset.detect(response));
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment