Skip to content

Instantly share code, notes, and snippets.

View nikopartanen's full-sized avatar
🐱

Niko Partanen nikopartanen

🐱
View GitHub Profile
import wave
from deepspeech import Model, version
import numpy as np
import sys
import subprocess
from pipes import quote
import shlex
import pympi
# This code is essentially taken from DeepSpeech native_client repository
@nikopartanen
nikopartanen / run_copius_transliterator.py
Created February 4, 2021 13:30
An example script to run Copius transliterator with Python
# This is an example from Niko Partanen how to use Copius transliterator from Python.
# The idea is to replicate the form that the website uses, and send that information
# there directly. For the website it essentially looks like someone would be just using
# it normally.
import requests
from lxml import html
def run_copius_transliterator(language, text, direction):
library(tidyverse)
library(lingtypology)
kpv <- read_csv("https://raw.githubusercontent.com/langdoc/kpv-geography/master/kpv.csv")
map.feature(languages = kpv$language,
features = kpv$dialect,
label = kpv$settlement,
latitude = kpv$latitude,
longitude = kpv$longitude)
import xml.etree.cElementTree as ET
from PIL import Image
import numpy as np
from google.protobuf.json_format import MessageToJson
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pathlib import Path
import sklearn as sk
from pathlib import Path
import xml.etree.cElementTree as ET
import re
# Tieto noista ylä- ja alaindekseistä on merkitty tällaisiin elementteihin:
# textStyle {offset:13; length:1;superscript:true;}
# Niistä pitää poimia milloin niitä on, j amiten pitkiä ne ovat
def get_offset_info(offsets):
@nikopartanen
nikopartanen / compile_language.sh
Last active January 16, 2020 13:20
setting-up-giellatekno-infra.md
lang=$1
export GTLANG_$lang=/mnt/data/trunk/langs/$lang
cd /mnt/data/trunk/langs/$lang
make clean
./autogen.sh
./configure --with-hfst --without-xfst --enable-tokenisers --enable-reversed-intersect --enable-alignment --enable-apertium --enable-dicts --enable-morpher
make
make install
@nikopartanen
nikopartanen / NeedlemanWunsch.png
Created May 20, 2019 10:47 — forked from slowkow/NeedlemanWunsch.png
Plot the score matrix from a Needleman-Wunsch pairwise sequence alignment
NeedlemanWunsch.png
### This is Niko Partanen's example R script that splits the National
### Library of Finland's dataset OCR Ground Truth Pages (Swedish Fraktur)
### into line-image–text-file pairs that can be used with training
### models with Tesseract. Same approach works easily also with Ocropy
### Data source:
### https://digi.kansalliskirjasto.fi/opendata
library(tidyverse)
library(xml2)
library(measurements)
library(overpass)
library(sf)

settlements <- 'area[name~"Адыгея|Кабардино-Балкария|Карачаево-Черкесия|Ставропольский край|Краснодарский край"];
(node["place"~"city|village|town|hamlet|isolated_dwelling"](area););
out;'

query_result <- overpass_query(settlements)
@nikopartanen
nikopartanen / cyrillic2latin_file_renamer.py
Created December 2, 2017 13:12 — forked from etrushkin/cyrillic2latin_file_renamer.py
Cyrillic to Latin File Changer
#!/usr/bin/python
# -*- coding: utf-8 -*-
# http://stackoverflow.com/questions/5574702/how-to-print-to-stderr-in-python
from __future__ import print_function
import sys
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
import errno