Skip to content

Instantly share code, notes, and snippets.

@Luvata
Luvata / pynini_demo.py
Created November 12, 2019 17:56
Simple mapping from raw number to syllable format in Vietnamese using pynini
import pynini
chars = [chr(i) for i in range(1, 91)] + [r"\[", r"\\", r"\]"] + [chr(i) for i in range(94, 256)]
chars += [bytes(i, "utf8") for i in "aáàạãảăắằặẵẳâấầậẫẩbcdđeéèẹẽẻêếềệễểghiíìịĩỉklmnoóòọõỏôốồộỗổơớờợỡởpqrstuúùụũủưứừựữửvxyýỳỵỹỷfjzw"]
chars = set(chars)
sigma_star = pynini.union(*chars).closure()
numbers = pynini.union("1", "2", "3", "4", "5", "6", "7", "8", "9", "0")
quants_map = pynini.union(
@Luvata
Luvata / eda.ipynb
Created October 28, 2019 10:31
QA_visualize
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
colors = ["#acc2d9", "#56ae57", "#b2996e", "#a8ff04", "#69d84f", "#894585", "#70b23f", "#d4ffff", "#65ab7c", "#952e8f",
"#fcfc81", "#a5a391", "#388004", "#4c9085", "#5e9b8a", "#efb435", "#d99b82", "#0a5f38", "#0c06f7", "#61de2a",
"#3778bf", "#2242c7", "#533cc6", "#9bb53c", "#05ffa6", "#1f6357", "#017374", "#0cb577", "#ff0789", "#afa88b",
"#08787f", "#dd85d7", "#a6c875", "#a7ffb5", "#c2b709", "#e78ea5", "#966ebd", "#ccad60", "#ac86a8", "#947e94",
"#983fb2", "#ff63e9", "#b2fba5", "#63b365", "#8ee53f", "#b7e1a1", "#ff6f52", "#bdf8a3", "#d3b683", "#fffcc4",
"#430541", "#ffb2d0", "#997570", "#ad900d", "#c48efd", "#507b9c", "#7d7103", "#fffd78", "#da467d", "#410200",
"#c9d179", "#fffa86", "#5684ae", "#6b7c85", "#6f6c0a", "#7e4071", "#009337", "#d0e429", "#fff917", "#1d5dec",
"#054907", "#b5ce08", "#8fb67b", "#c8ffb0", "#fdde6c", "#ffdf22", "#a9be70", "#6832e3", "#fdb147", "#c7ac7d",
"#fff39a", "#850e04", "#efc0fe", "#40fd14", "#b6c406",
from logistic_np import LogisticClassifier, normalize_per_pixel, normalize_all_pixel, reshape2D, add_one, test
import numpy as np
def test1_1():
"Normalize per pixel, read code to make sure no loop appear"
X_train = np.arange(12).reshape(3, 2, 2)
X_test = np.arange(20).reshape(5, 2, 2)
mean = X_train.mean(axis=0)
sample_colors = ['#FF6633', '#FFB399', '#FF33FF', '#FFFF99', '#00B3E6',
'#E6B333', '#3366E6', '#999966', '#99FF99', '#B34D4D',
'#80B300', '#809900', '#E6B3B3', '#6680B3', '#66991A',
'#FF99E6', '#CCFF1A', '#FF1A66', '#E6331A', '#33FFCC',
'#66994D', '#B366CC', '#4D8000', '#B33300', '#CC80CC',
'#66664D', '#991AFF', '#E666FF', '#4DB3FF', '#1AB399',
'#E666B3', '#33991A', '#CC9999', '#B3B31A', '#00E680',
'#4D8066', '#809980', '#E6FF80', '#1AFF33', '#999933',
'#FF3380', '#CCCC00', '#66E64D', '#4D80CC', '#9900B3',
'#E64D66', '#4DB380', '#FF4D4D', '#99E6E6', '#6666FF']
@Luvata
Luvata / tnse.py
Created May 3, 2019 14:10
plot tnse
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
from sklearn import manifold, datasets, decomposition, discriminant_analysis
import pickle
import os
def embedding_plot(X, colors, title):
x_min, x_max = np.min(X, axis=0), np.max(X, axis=0)
X = (X - x_min) / (x_max - x_min)
sealed trait List[+A] // trait : inteface; sealed : implementation only on this file
//+A: if X is subtype of Y -> List[X] is subtype of List[Y]
case object Nil extends List[Nothing] //empty list
// Nothing is subtype of all types
case class Cons[+A](head: A, tail: List[A]) extends List[A] // constructor of non empty list
/*
A case class can take arguments, so each instance of that case class can be different
based on the values of it's arguments. A case object on the other hand does not take args