I am not a man
ฉันไม่ใช่ผู้ชาย
chan/prn mai-chai/adv phu-chai/n
# encoding: UTF-8 | |
require 'thailang4r/word_breaker' | |
word_breaker = ThaiLang::WordBreaker.new | |
File.open("data1.txt", "r:UTF-8") do |file| | |
txt = file.read | |
puts word_breaker.break_into_words(txt) | |
end |
อ a | |
อิ i | |
อุ u | |
อา ā | |
อี ī | |
อู ū | |
เอ e | |
โอ o |
var getBody = require('raw-body'); | |
// ... | |
app.post("/do_sth_with_json", function(req, res) { | |
getBody(req, { | |
limit: '1mb', | |
length: req.headers['content-length'], | |
encoding: 'utf8' | |
}, function (err, buf) { |
require "nokogiri" | |
require "pp" | |
class EngDix | |
def initialize(monodix_path) | |
@word_hash = {} | |
File.open(monodix_path) do |file| | |
while file.gets | |
line = $_.chomp | |
if line =~ /^\s+<e/ |
package main | |
// Based on https://github.com/dps/go-xml-parse/blob/master/go-xml-parse.go | |
import ( | |
"fmt" | |
"os" | |
"flag" | |
"encoding/xml" | |
"strings" |
{"Li":"mathematics","Gloss":["คณิตศาสตร์"]} | |
{"Li":"calculus","Gloss":["แคลคูลัส","กรวด","หิน"]} | |
{"Li":"a","Gloss":["สัทอักษรสากล"]} | |
{"Li":"car","Gloss":["รถราง"]} | |
{"Li":"nose","Gloss":["จมูก"]} | |
{"Li":"I love you","Gloss":["ฉันรักคุณ"]} | |
{"Li":"poet","Gloss":["กวี"]} | |
{"Li":"eat","Gloss":["กิน","รับประทาน"]} | |
{"Li":"consume","Gloss":["ใช้","กิน","เผลาผลาญ"]} | |
{"Li":"sweet","Gloss":["หวาน","น่ารัก","ยอดเยี่ยม","ขั้นเทพ"]} |
require "nokogiri" | |
require "json" | |
require 'gdbm' | |
class LiPosFromGcideExtractor | |
def parse_each_file(filename) | |
File.open(filename, "r:ISO-8859-1") do |file| | |
chunks = file.read | |
.split(/\n\n/) | |
.select{|chunk| chunk =~ /^[<\[]\w/} |
<?xml version="1.0" encoding="UTF-8"?> | |
<!-- -*- nxml -*- --> | |
<transfer default="chunk"> | |
<section-def-cats> | |
<def-cat n="nom"> | |
<cat-item tags="n.*"/> | |
</def-cat> | |
<def-cat n="adj"> | |
<cat-item tags="adj.*"/> | |
</def-cat> |
require "nokogiri" | |
include Nokogiri::XML | |
def main | |
en_dix_path = File.join(File.dirname(__FILE__), | |
"apertium-dix", | |
"apertium-en-es.en.metadix") |