Wannaphong Phatthiyaphaibun wannaphong

## ngrams.py
# -*- coding: utf-8 -*-
"""Print most frequent N-grams in given file.

Usage: python ngrams.py filename

Problem description: Build a tool which receives a corpus of text,
analyses it and reports the top 10 most frequent bigrams, trigrams,
four-grams (i.e. most frequently occurring two, three and four word
consecutive combinations).

## test.py
>>> import spacy
>>> th_nlp = spacy.load('th')
>>> text="คุณรักผมไหม"
>>> a= th_nlp(text)
>>> a
คุณรักผมไหม
>>> list(a)
[คุณ, รัก, ผม, ไหม]

## dict.txt
คน
เล่น
แกม
ตา
จน

## thai_spacy2.py
>>> from spacy.lang.th import Thai
>>> nlp = Thai()
>>> text="คุณรักผมไหม"
>>> a = nlp(text)
>>> a
คุณรักผมไหม
>>> list(a)
[คุณ, รัก, ผม, ไหม]

## face_recognition_test.py
import face_recognition
picture_of_Steve_Jobs = face_recognition.load_image_file("Steve_Jobs_Headshot_2010-CROP.jpg") # ไฟล์ต้นแบบ
face_encoding = face_recognition.face_encodings(picture_of_Steve_Jobs)[0] # เข้ารหัสหน้าตา
unknown_picture = face_recognition.load_image_file("0x600.jpg") # ไฟล์ที่ต้องการตรวจสอบ
unknown_face_encoding = face_recognition.face_encodings(unknown_picture)[0] # เข้ารหัสหน้าตา
results = face_recognition.compare_faces([face_encoding], unknown_face_encoding) # ทำการเปรียบเทียบด้วย Face Recognition
if results[0] == True:
  print("It's a picture of Steve Jobs!")
else:
  print("It's not a Steve Jobs!")

## thaiwordnet.py
from nltk.corpus import wordnet
class thaiwordnet:
  def __init__(self):
    self._wordnet = wordnet
  def synsets(self, word, pos=None, lang="tha"):
    return self._wordnet.synsets(lemma=word,pos=pos,lang=lang)
  def synset(self,name_synsets):
    return self._wordnet.synset(name_synsets)
  def all_lemma_names(self,pos=None, lang="tha"):
    return self._wordnet.all_lemma_names(pos=pos, lang=lang)

## gist:e46ffd5d91ab436a44e40456865a9ee8
<div class='post-share-buttons'>
<iframe allowTransparency='true' expr:src='&quot;https://www.facebook.com/plugins/like.php?href=&quot; + data:post.canonicalUrl + &quot;&amp;layout=box_count&amp;show_faces=false&amp;width=100&amp;action=like&amp;font=arial&amp;colorscheme=light&quot;' frameborder='0' scrolling='no' style='border:none; overflow:hidden; width:55px; height:62px;'/>
<iframe allowTransparency='true' expr:src='&quot;https://www.facebook.com/plugins/share_button.php?href=&quot; + data:post.canonicalUrl + &quot;&amp;layout=box_count&amp;show_faces=false&amp;width=100&amp;action=like&amp;font=arial&amp;colorscheme=light&quot;' frameborder='0' scrolling='no' style='border:none; overflow:hidden; width:55px; height:62px;'/>
<br />
<b:include data='post' name='shareButtons'/>
</div>

## provider_paths.xml
<provider
            android:name="android.support.v4.content.FileProvider"
            android:authorities="${applicationId}.provider"
            android:exported="false"
            android:grantUriPermissions="true">
            <meta-data
                android:name="android.support.FILE_PROVIDER_PATHS"
                android:resource="@xml/provider_paths"/>
        </provider>

## rake_thai.py
# -*- coding: utf-8 -*-
"""Implementation of Rapid Automatic Keyword Extraction algorithm.

As described in the paper `Automatic keyword extraction from individual
documents` by Stuart Rose, Dave Engel, Nick Cramer and Wendy Cowley.

Thai language by Mr.Wannaphong Phatthiyaphaibun <wannaphong@kkumail.com>
"""

import string

## icu_word_segmentation.java
// เดติดต้นฉบับจาก http://vuthi.blogspot.com.au/2004/08/java.html
public String icu_word_segmentation(String txt){
        Locale thaiLocale = new Locale("th");
        BreakIterator boundary = BreakIterator.getWordInstance(thaiLocale);
        boundary.setText(txt);
        StringBuffer strout = new StringBuffer();
        int start = boundary.first();
        for (int end = boundary.next();
            end != BreakIterator.DONE;
            start = end, end = boundary.next()) {
	# -- coding: utf-8 --
	"""Print most frequent N-grams in given file.

	Usage: python ngrams.py filename

	Problem description: Build a tool which receives a corpus of text,
	analyses it and reports the top 10 most frequent bigrams, trigrams,
	four-grams (i.e. most frequently occurring two, three and four word
	consecutive combinations).
	>>> import spacy
	>>> th_nlp = spacy.load('th')
	>>> text="คุณรักผมไหม"
	>>> a= th_nlp(text)
	>>> a
	คุณรักผมไหม
	>>> list(a)
	[คุณ, รัก, ผม, ไหม]
	>>> from spacy.lang.th import Thai
	>>> nlp = Thai()
	>>> text="คุณรักผมไหม"
	>>> a = nlp(text)
	>>> a
	คุณรักผมไหม
	>>> list(a)
	[คุณ, รัก, ผม, ไหม]
	import face_recognition
	picture_of_Steve_Jobs = face_recognition.load_image_file("Steve_Jobs_Headshot_2010-CROP.jpg") # ไฟล์ต้นแบบ
	face_encoding = face_recognition.face_encodings(picture_of_Steve_Jobs)[0] # เข้ารหัสหน้าตา
	unknown_picture = face_recognition.load_image_file("0x600.jpg") # ไฟล์ที่ต้องการตรวจสอบ
	unknown_face_encoding = face_recognition.face_encodings(unknown_picture)[0] # เข้ารหัสหน้าตา
	results = face_recognition.compare_faces([face_encoding], unknown_face_encoding) # ทำการเปรียบเทียบด้วย Face Recognition
	if results[0] == True:
	print("It's a picture of Steve Jobs!")
	else:
	print("It's not a Steve Jobs!")
	from nltk.corpus import wordnet
	class thaiwordnet:
	def __init__(self):
	self._wordnet = wordnet
	def synsets(self, word, pos=None, lang="tha"):
	return self._wordnet.synsets(lemma=word,pos=pos,lang=lang)
	def synset(self,name_synsets):
	return self._wordnet.synset(name_synsets)
	def all_lemma_names(self,pos=None, lang="tha"):
	return self._wordnet.all_lemma_names(pos=pos, lang=lang)
	<div class='post-share-buttons'>
	<iframe allowTransparency='true' expr:src='"https://www.facebook.com/plugins/like.php?href=" + data:post.canonicalUrl + "&layout=box_count&show_faces=false&width=100&action=like&font=arial&colorscheme=light"' frameborder='0' scrolling='no' style='border:none; overflow:hidden; width:55px; height:62px;'/>
	<iframe allowTransparency='true' expr:src='"https://www.facebook.com/plugins/share_button.php?href=" + data:post.canonicalUrl + "&layout=box_count&show_faces=false&width=100&action=like&font=arial&colorscheme=light"' frameborder='0' scrolling='no' style='border:none; overflow:hidden; width:55px; height:62px;'/>
	<br />
	<b:include data='post' name='shareButtons'/>
	</div>
	<provider
	android:name="android.support.v4.content.FileProvider"
	android:authorities="${applicationId}.provider"
	android:exported="false"
	android:grantUriPermissions="true">
	<meta-data
	android:name="android.support.FILE_PROVIDER_PATHS"
	android:resource="@xml/provider_paths"/>
	</provider>
	# -- coding: utf-8 --
	"""Implementation of Rapid Automatic Keyword Extraction algorithm.

	As described in the paper `Automatic keyword extraction from individual
	documents` by Stuart Rose, Dave Engel, Nick Cramer and Wendy Cowley.

	Thai language by Mr.Wannaphong Phatthiyaphaibun <wannaphong@kkumail.com>
	"""

	import string
	// เดติดต้นฉบับจาก http://vuthi.blogspot.com.au/2004/08/java.html
	public String icu_word_segmentation(String txt){
	Locale thaiLocale = new Locale("th");
	BreakIterator boundary = BreakIterator.getWordInstance(thaiLocale);
	boundary.setText(txt);
	StringBuffer strout = new StringBuffer();
	int start = boundary.first();
	for (int end = boundary.next();
	end != BreakIterator.DONE;
	start = end, end = boundary.next()) {