Skip to content

Instantly share code, notes, and snippets.

Avatar
😅
Learning PhD from home

Wannaphong Phatthiyaphaibun wannaphong

😅
Learning PhD from home
View GitHub Profile
@korakot
korakot / colab_download.py
Created Nov 15, 2017
Google colab file upload/download
View colab_download.py
files.download('example.txt') # from colab to browser download
@korakot
korakot / thai_datetime.py
Created Oct 30, 2017
Thai datetime in python
View thai_datetime.py
import datetime, pytz
tz = pytz.timezone('Asia/Bangkok')
def now():
now1 = datetime.datetime.now(tz)
month_name = 'x มกราคม กุมภาพันธ์ มีนาคม เมษายน พฤษภาคม มิถุนายน กรกฎาคม สิงหาคม กันยายน ตุลาคม พฤศจิกายน ธันวาคม'.split()[now1.month]
thai_year = now1.year + 543
time_str = now1.strftime('%H:%M:%S')
return "%d %s %d %s"%(now1.day, month_name, thai_year, time_str) # 30 ตุลาคม 2560 20:45:30
@korakot
korakot / collation.py
Last active Oct 26, 2017
A simplified pure-python thaisort
View collation.py
# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals, print_function
import re
try:
import icu
thkey = icu.Collator.createInstance(icu.Locale('th_TH')).getSortKey
except ImportError:
def thkey(word):
cv = re.sub('[็-์]', '', word) # remove tone
@korakot
korakot / lmcut.py
Last active Jan 18, 2020
Longest matching Thai word tokenization
View lmcut.py
from marisa_trie import Trie
# wordlist = ...
trie = Trie(wordlist)
def lmcut(text):
for w in reversed(trie.prefixes(text)):
if w==text:
yield [w]
else:
@korakot
korakot / multicut.py
Last active Feb 1, 2018
Return all possible ways to cut(tokenize) Thai text.
View multicut.py
import re
from collections import defaultdict
from marisa_trie import Trie
wordlist = [li.strip() for li in open('wordlist.txt')]
trie = Trie(wordlist) # สร้างครั้งเดียว ข้างนอก function
class LatticeString(str):
''' String subclass เพื่อเก็บวิธีตัดหลายๆ วิธี
'''
@korakot
korakot / LK82.py
Last active Apr 13, 2020
Thai Soundex LK82, Udom83
View LK82.py
# ตาม guru.sanook.com/1520
import re
t1 = str.maketrans("กขฃคฅฆงจฉชฌซศษสญยฎดฏตณนฐฑฒถทธบปผพภฝฟมรลฬฤฦวหฮอ",
"กกกกกกงจชชชซซซซยยดดตตนนททททททบปพพพฟฟมรรรรรวหหอ")
t2 = str.maketrans(
"กขฃคฅฆงจฉชซฌฎฏฐฑฒดตถทธศษสญณนรลฬฤฦบปพฟภผฝมำยวไใหฮาๅึืเแโุูอ",
"1111112333333333333333333444444445555555667777889AAABCDEEF")
def LK82(s):
res = []
@avinassh
avinassh / container.xml
Created Oct 27, 2015 — forked from anqxyr/archived
Create EPUB files with Python
View container.xml
<?xml version='1.0' encoding='UTF-8'?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile media-type="application/oebps-package+xml" full-path="content.opf"/>
</rootfiles>
</container>
@stewartpark
stewartpark / xor.py
Created Oct 12, 2015
Simple XOR learning with keras
View xor.py
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
import numpy as np
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])
model = Sequential()
model.add(Dense(8, input_dim=2))
@fheisler
fheisler / q.py
Created Mar 31, 2015
Q-learning Tic-tac-toe
View q.py
import random
class TicTacToe:
def __init__(self, playerX, playerO):
self.board = [' ']*9
self.playerX, self.playerO = playerX, playerO
self.playerX_turn = random.choice([True, False])
def play_game(self):
View UAX_29.py
# We start by loading up PyICU.
import PyICU as icu
# Let's create a test text. Notice it contains some punctuation.
test = u"This is (\"a\") test!"
# We create a wordbreak iterator. All break iterators in ICU are really RuleBasedBreakIterators, and we need to tell it which locale to take the word break rules from. Most locales have the same rules for UAX#29 so we will use English.
wb = icu.BreakIterator.createWordInstance(icu.Locale.getEnglish())
# An iterator is just that. It contains state and then we iterate over it. The state in this case is the text we want to break. So we set that.