Skip to content

Instantly share code, notes, and snippets.

@wincentbalin
wincentbalin / handwriter.py
Created July 11, 2024 12:26 — forked from kastnerkyle/handwriter.py
Single file handwriting experiment
# Author: Kyle Kastner
# License: BSD 3-clause
# Thanks to Jose (@sotelo) for tons of guidance and debug help
# Credit also to Junyoung (@jych) and Shawn (@shawntan) for help/utility funcs
# Strangeness in init could be from onehots, via @igul222. Ty init for one hot layer as N(0, 1) just as in embedding
# since oh.dot(w) is basically an embedding
import os
import re
import tarfile
from collections import Counter
@wincentbalin
wincentbalin / metrum.html
Created December 10, 2023 12:22
Time calculations for media
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width">
<title>Media time converter</title>
<style type="text/css">
body {
margin: 0 auto;
max-width: 27em
@wincentbalin
wincentbalin / music-comb.ny
Last active November 12, 2023 20:48
Audacity plug-in: Comb filter for pitch frequencies between 260 and 4000 Hz
;nyquist plug-in
;version 4
;type process
;preview linear
;name (_ "Music comb filter")
;debugbutton false
;author (_ "Wincent Balin")
;copyright (_ "GNU General Public License v2.0 or later")
;control Q (_ "Q (higher value reduces width)") float-text "" 1 0.1 1000
@wincentbalin
wincentbalin / telegram_markdown_corpus.py
Created January 31, 2023 10:18
Convert Telegram JSON export to Markdown
#!/usr/bin/env python3
"""Convert Telegram channel JSON export to a Markdown text corpus"""
import sys
import json
import argparse
parser = argparse.ArgumentParser(description=sys.modules[__name__].__doc__)
parser.add_argument('export_json', help='Exported JSON file', type=argparse.FileType('r', encoding='utf-8'))
parser.add_argument('markdown_corpus', help='Markdown text corpus', type=argparse.FileType('w', encoding='utf-8'))
args = parser.parse_args()
@wincentbalin
wincentbalin / Makefile
Created January 19, 2022 21:42
Training Cuneiform for Tesseract 3
# Train Tesseract OCR for Akkadian language
LANG = akk
CORPUS = corpus-12pt.txt
LANGDATA_ROOT = ../langdata
FREQ_DAWG_SIZE = 100
FONTS := "CuneiformNAOutline Medium" "CuneiformOB" "CuneiformComposite" "Segoe UI Historic"
FONTSJOINED := CuneiformNAOutlineMedium CuneiformOB CuneiformComposite SegoeUIHistoric
EXPOSURES := 0
@wincentbalin
wincentbalin / oracc-export.py
Created December 21, 2021 20:08
ORACC cuneiform text scraper
#!/usr/bin/env python
"""Export cuneiform corpus from ORACC
"""
import sys
import os
import argparse
import logging
import re
import shutil
@wincentbalin
wincentbalin / aktuellparser.py
Last active July 13, 2021 20:35
Generate German laws
import re
from html.parser import HTMLParser
RE_TEILLISTE = re.compile(r'/Teilliste_\w\.html$', re.IGNORECASE)
def get_url(attrs):
"""Find href attribute and join it with base URL"""
for key, value in attrs:
if key == 'href':
return urljoin(START_URL, value)
@wincentbalin
wincentbalin / giitotext.xsl
Last active July 20, 2021 22:14
Transform XML files from gesetze-im-internet.de to text
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="utf-8" omit-xml-declaration="yes"/>
<xsl:strip-space elements="*"/>
<xsl:variable name="newline"><xsl:text>
</xsl:text></xsl:variable>
<xsl:variable name="space"><xsl:text> </xsl:text></xsl:variable>
<xsl:variable name="tab" select="concat($space, $space, $space, $space)"/>
<xsl:template match="/dokumente">
@wincentbalin
wincentbalin / Vagrantfile
Last active May 17, 2020 11:45
Vagrantfile for CouchDB with Python
# -*- mode: ruby -*-
# vi: set ft=ruby :
Vagrant.configure("2") do |config|
required_plugins = %w( vagrant-vbguest vagrant-disksize )
_retry = false
required_plugins.each do |plugin|
unless Vagrant.has_plugin? plugin
system "vagrant plugin install #{plugin}"
_retry=true
--- export/batch_tester/batch_tester.cc.orig 2016-02-09 12:38:34.000000000 +0000
+++ export/batch_tester/batch_tester.cc 2019-04-27 20:04:32.358891300 +0000
@@ -37,11 +37,11 @@
using thrax::GrmManager;
using thrax::InputBuffer;
using thrax::OpenOrDie;
-using thrax::Split;
+//using thrax::Split;
-typedef StringCompiler<StdArc> Compiler;