Skip to content

Instantly share code, notes, and snippets.

View TinoDidriksen's full-sized avatar
🐺

Tino Didriksen TinoDidriksen

🐺
View GitHub Profile
@TinoDidriksen
TinoDidriksen / Dockerfile
Last active May 10, 2021 11:45
giella-sma-linux
FROM amd64/debian:sid
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 DEBIAN_FRONTEND=noninteractive DEBCONF_NONINTERACTIVE_SEEN=true
# Upgrade everything and install base builder dependencies
RUN apt-get -qy update && apt-get -qfy --no-install-recommends install apt-utils
RUN apt-get -qy update && apt-get -qfy --no-install-recommends dist-upgrade
RUN apt-get -qy update && apt-get -qfy --no-install-recommends install build-essential fakeroot time libgoogle-perftools-dev
# OS dependencies
#!/usr/bin/env perl
# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
use warnings;
use strict;
use utf8;
BEGIN {
$| = 1;
binmode(STDIN, ':encoding(UTF-8)');
binmode(STDOUT, ':encoding(UTF-8)');
@TinoDidriksen
TinoDidriksen / 01-freenode.txt
Last active June 15, 2021 12:49
Freenode Status
-NickServ- Information on TinoDidriksen
-NickServ- Registered : Mar 21 17:25:41 2009
-ChanServ- Information on #apertium
-ChanServ- Founder : spectei
-ChanServ- Registered : Jan 09 18:28:40 2007
-ChanServ- Information on #hfst
-ChanServ- Founder : spectei, Nikerabbit
-ChanServ- Registered : Oct 07 11:15:41 2010
// Upper-case because we compare them to DOM nodeName
let text_nodes = {'ADDRESS': true, 'ARTICLE': true, 'ASIDE': true, 'AUDIO': true, 'BLOCKQUOTE': true, 'BODY': true, 'CANVAS': true, 'DD': true, 'DIV': true, 'DL': true, 'FIELDSET': true, 'FIGCAPTION': true, 'FIGURE': true, 'FOOTER': true, 'FORM': true, 'H1': true, 'H2': true, 'H3': true, 'H4': true, 'H5': true, 'H6': true, 'HEADER': true, 'HGROUP': true, 'HTML': true, 'HR': true, 'LI': true, 'MAIN': true, 'NAV': true, 'NOSCRIPT': true, 'OL': true, 'OUTPUT': true, 'P': true, 'PRE': true, 'SECTION': true, 'TABLE': true, 'TD': true, 'TH': true, 'UL': true, 'VIDEO': true};
function findTextNodes(nodes, filter) {
let tns = [], wsx = /\S/;
if (!$.isArray(nodes)) {
nodes = [nodes];
}
<word>En</word>
"<en>"
"en" det mask ent kvant
"en" adv
"en" pron ent pers hum
"ene" verb imp
<word>serbisk</word>
"<serbisk>"
"serbisk" adj ub m/f ent pos
"serbisk" adj nøyt ub ent pos
grep -rl '<?php' * | xargs -r grep -rl '?>' | xargs -rn1 php -r '$f=file_get_contents($argv[1]); if (preg_match("~[?]>[\s\n]*$~", $f)) { $f = trim(preg_replace("~[?]>[\s\n]*$~", "", $f))."\n"; file_put_contents($argv[1], $f); }' --
@TinoDidriksen
TinoDidriksen / 01-msys.sh
Created October 5, 2021 10:20
MSYS2 CMake + Boost Program Options
$ pacman -S cmake make mingw-w64-x86_64-gcc mingw-w64-x86_64-boost
echo -e 'Nuuk\nQaanaaq\nAasiaat' | ~/langtech/kal/tools/shellscripts/kal-tokenise | cg-conv -c -F | grep -F Prop+Abs | lg-multi.pl
@TinoDidriksen
TinoDidriksen / 01-katersat-had-multiple.txt
Last active March 7, 2023 14:43
Katersat Danish adjectives
aftagende
akut
almindelig
belgisk
betydningsfuld
blød
bæredygtig
cirkulær
delelig
deltagende
#!/usr/bin/env php
<?php
/*
USAGE:
./cg-combsets.php kalcg.dansyn kalcg.dan kalcg.dep kalcg.extra kalcg.pre > sets.cg3
Parses all given grammars and spits out all sets used across all grammars, and marks conflicts for manual inspection.
Used to create a combined deduplicated sets.cg3 that multiple grammars can Include.