Skip to content

Instantly share code, notes, and snippets.

Avatar
:octocat:
🍡 NLP 🍡

Junki Ohmura jojonki

:octocat:
🍡 NLP 🍡
View GitHub Profile
@jojonki
jojonki / bpe.py
Created Jun 17, 2020
BPE (Neural Machine Translation of Rare Words with Subword Units, Rico Sennrich.)
View bpe.py
import collections
import re
def get_stats(vocab):
pairs = collections.defaultdict(int)
for word, freq in vocab.items():
symbols = word.split()
for i in range(len(symbols)-1):
pairs[symbols[i],symbols[i+1]] += freq
@jojonki
jojonki / .direnvrc
Last active May 3, 2020
direnv for anaconda witch automatically switches anaconda environments.
View .direnvrc
activate_conda(){
eval "$(conda shell.zsh hook)"
conda activate $1
}
View lower_bound_and_upper_bound.py
# けんちょんさんのめぐるん式二部探索解説
# https://qiita.com/drken/items/97e37dd6143e33a64c8c
def lowerBound(nums, key):
"""numsの中でkey以上の要素のうちの一番左のインデックス"""
ng, ok = -1, len(nums)
while abs(ok - ng) > 1:
mid = (ok + ng) // 2
if key <= nums[mid]:
ok = mid
@jojonki
jojonki / convert.py
Created Jan 17, 2020
Convert latex to text with pandocfilters
View convert.py
"""Remove latex blocks and replace math functiosn with (MATH)
- How to debug:
pandoc -t json aaa.tex | python ./this-file.py | pandoc -f json -t plain
- How to use:
pandoc -s aaa.tex --filter=./this-file.py -o out.text
"""
from pandocfilters import toJSONFilter, Str
@jojonki
jojonki / launch.json
Last active Jan 18, 2020
python vscode sandbox
View launch.json
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"pythonPath": "${config:python.pythonPath}",
@jojonki
jojonki / plotly_orca_example.py
Last active Dec 12, 2019
Call quay.io/plotly/orca API
View plotly_orca_example.py
"""Use plotly orca (Docker) to save png/html files.
Run plotly-orca server before running this code.
% docker run -d -p 9091:9091 quay.io/plotly/orca
I refer the following forum to write this code and slightly modify the code for proper import.
https://community.plot.ly/t/plotly-io-with-external-orca/25600/2
"""
import io
import requests
@jojonki
jojonki / timeshift_sony_actioncam.py
Created Oct 27, 2019
Timeshift modification for Sony's actioncam on Mac
View timeshift_sony_actioncam.py
from datetime import datetime
from pytz import timezone
import subprocess
import glob
import xml.etree.ElementTree as ET
# replaced time zones
SRC_TZ_ZONES = ['UTC+09:00', 'UTC+01:00']
# target time zone
TGT_TZ_ZONE = ['Europe/Berlin']
@jojonki
jojonki / global_variables.py
Created Feb 4, 2019
Hack for read-only class variables.
View global_variables.py
class MetaGlobalVariables(type):
@property
def HOGE(cls):
return cls._GlobalVariables__hoge # mangling
class GlobalVariables(object, metaclass=MetaGlobalVariables):
__hoge = 'xxxx'
# read OK
@jojonki
jojonki / simple_viterbi.py
Created Jan 3, 2019
Simple viterbi algorithm example
View simple_viterbi.py
# Use Graham's example.
# http://www.phontron.com/slides/nlp-programming-ja-03-ws.pdf
INF = 1e6
edge_list = [
None, # e0
{ # e1
'id': 1,
'score': 2.5,
'begin_node_id': 0,
View gist:cb18b14e789eb8bdb2ae2950eef1019f
defaults write com.adobe.illustrator AppleLanguages '("ja")'
You can’t perform that action at this time.