Skip to content

Instantly share code, notes, and snippets.

@tsudoko
tsudoko / .gitignore
Last active November 20, 2020 05:32
Official kanji list scrapers
__pycache__
#!/usr/bin/env python3
import argparse
import ast
import sys
encodings = ["ascii", "big5", "big5hkscs", "cp037", "cp424", "cp437", "cp500", "cp720", "cp737", "cp775", "cp850", "cp852", "cp855", "cp856", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863", "cp864", "cp865", "cp866", "cp869", "cp874", "cp875", "cp932", "cp949", "cp950", "cp1006", "cp1026", "cp1140", "cp1250", "cp1251", "cp1252", "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "euc_jp", "euc_jis_2004", "euc_jisx0213", "euc_kr", "gb2312", "gbk", "gb18030", "hz", "iso2022_jp", "iso2022_jp_1", "iso2022_jp_2", "iso2022_jp_2004", "iso2022_jp_3", "iso2022_jp_ext", "iso2022_kr", "latin_1", "iso8859_2", "iso8859_3", "iso8859_4", "iso8859_5", "iso8859_6", "iso8859_7", "iso8859_8", "iso8859_9", "iso8859_10", "iso8859_13", "iso8859_14", "iso8859_15", "iso8859_16", "johab", "koi8_r", "koi8_u", "mac_cyrillic", "mac_greek", "mac_iceland", "mac_latin2", "mac_roman", "mac_turkish", "ptcp154", "shift_jis", "shift_jis_2004", "shift_jisx0213", "utf_3
#!/usr/bin/env python3
import collections
import os
import sys
CDXFile = collections.namedtuple("CDXFile", ["sep", "fields", "file"])
essential_fields = frozenset("aku")
warn_warc_dedup = True
def say(*args):
_SPACE = "\u0020\u0009\u000a\u000c\u000d"
_POS_OUTSIDE = 0
_POS_URL = 1
_POS_DESCRIPTOR = 2
def urls(srcset):
# URLs may contain commas, so we can't just .split(',')
pos = _POS_OUTSIDE
url = ""
#!/usr/bin/env python3
import json
import sys
import bs4
import requests
# this is public domain
# A00100C is pc-9800, untested on other groups
@tsudoko
tsudoko / 0001-kernelbase-locale-Implement-comparison-on-top-of-off.patch
Last active February 17, 2020 22:56
Wine patches used in personal builds
From: Fabian Maurer <dark.shadow4@web.de>
Subject: [PATCH v3 1/2] kernelbase/locale: Implement comparison on top of official unicode weight tables
Message-Id: <20200215192748.382909-1-dark.shadow4@web.de>
Date: Sat, 15 Feb 2020 20:27:47 +0100
This is the first patch to get proper string comparison.
The algorithm is loosely based on MS-UCODEREF, and the tables
are taken from the official Microsoft download.
We start by implementing the sortkey step by step,
this first version is enough to not break any existing tests.
# adjust as needed
fdisk /dev/sda
# /dev/sda1 200M /boot
# /dev/sda2 300G /home
# /dev/sda3 20G /
mkfs.ext2 /dev/sda1
mkfs.ext4 /dev/sda2
mkfs.ext4 /dev/sda3
@tsudoko
tsudoko / long-live-pomf.md
Last active November 4, 2019 03:29
Pomf.se alternatives

Moved to GitHub due to requests, see

#!/usr/bin/env python3
from bs4 import BeautifulSoup
import urllib.request
import os.path
import sys
def get_contents_plain(html):
soup = BeautifulSoup(html)
#!/usr/bin/env python3
from os.path import basename
import argparse
import json
import os
import sys
import urllib.parse
import urllib.request
if sys.stdout.isatty():