Skip to content

Instantly share code, notes, and snippets.

Learning PhD from home

Wannaphong Phatthiyaphaibun wannaphong

Learning PhD from home
View GitHub Profile
korakot /
Created Nov 15, 2017
Google colab file upload/download
View'example.txt') # from colab to browser download
korakot /
Created Oct 30, 2017
Thai datetime in python
import datetime, pytz
tz = pytz.timezone('Asia/Bangkok')
def now():
now1 =
month_name = 'x มกราคม กุมภาพันธ์ มีนาคม เมษายน พฤษภาคม มิถุนายน กรกฎาคม สิงหาคม กันยายน ตุลาคม พฤศจิกายน ธันวาคม'.split()[now1.month]
thai_year = now1.year + 543
time_str = now1.strftime('%H:%M:%S')
return "%d %s %d %s"%(, month_name, thai_year, time_str) # 30 ตุลาคม 2560 20:45:30
korakot /
Last active Oct 26, 2017
A simplified pure-python thaisort
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals, print_function
import re
import icu
thkey = icu.Collator.createInstance(icu.Locale('th_TH')).getSortKey
except ImportError:
def thkey(word):
cv = re.sub('[็-์]', '', word) # remove tone
korakot /
Last active Jan 18, 2020
Longest matching Thai word tokenization
from marisa_trie import Trie
# wordlist = ...
trie = Trie(wordlist)
def lmcut(text):
for w in reversed(trie.prefixes(text)):
if w==text:
yield [w]
korakot /
Last active Feb 1, 2018
Return all possible ways to cut(tokenize) Thai text.
import re
from collections import defaultdict
from marisa_trie import Trie
wordlist = [li.strip() for li in open('wordlist.txt')]
trie = Trie(wordlist) # สร้างครั้งเดียว ข้างนอก function
class LatticeString(str):
''' String subclass เพื่อเก็บวิธีตัดหลายๆ วิธี
korakot /
Last active Apr 13, 2020
Thai Soundex LK82, Udom83
# ตาม
import re
t1 = str.maketrans("กขฃคฅฆงจฉชฌซศษสญยฎดฏตณนฐฑฒถทธบปผพภฝฟมรลฬฤฦวหฮอ",
t2 = str.maketrans(
def LK82(s):
res = []
avinassh / container.xml
Created Oct 27, 2015 — forked from anqxyr/archived
Create EPUB files with Python
View container.xml
<?xml version='1.0' encoding='UTF-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
stewartpark /
Created Oct 12, 2015
Simple XOR learning with keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])
model = Sequential()
model.add(Dense(8, input_dim=2))
fheisler /
Created Mar 31, 2015
Q-learning Tic-tac-toe
import random
class TicTacToe:
def __init__(self, playerX, playerO):
self.board = [' ']*9
self.playerX, self.playerO = playerX, playerO
self.playerX_turn = random.choice([True, False])
def play_game(self):
# We start by loading up PyICU.
import PyICU as icu
# Let's create a test text. Notice it contains some punctuation.
test = u"This is (\"a\") test!"
# We create a wordbreak iterator. All break iterators in ICU are really RuleBasedBreakIterators, and we need to tell it which locale to take the word break rules from. Most locales have the same rules for UAX#29 so we will use English.
wb = icu.BreakIterator.createWordInstance(icu.Locale.getEnglish())
# An iterator is just that. It contains state and then we iterate over it. The state in this case is the text we want to break. So we set that.