Skip to content

Instantly share code, notes, and snippets.

View lttzzlll's full-sized avatar

liutaotao lttzzlll

  • http://metasota.ai/
View GitHub Profile
@lttzzlll
lttzzlll / kNN.py
Last active May 2, 2018 14:38
kNN algorithm
# classifier method
# dataSet: the training data set
# inx: the test sample
# labels: the corresponding labels about the data set
# k: the top k classes to be selected
def classify0(inX, dataSet, labels, k):
sortedDistIndicies = euclideanMetric(dataSet, inX)
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
@lttzzlll
lttzzlll / kNN
Created February 28, 2017 08:17
kNN algorithm
# classifier method
# dataSet: the training data set
# inx: the test sample
# labels: the corresponding labels about the data set
# k: the top k classes to be selected
def classify0(inX, dataSet, labels, k):
sortedDistIndicies = euclideanMetric(dataSet, inX)
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
'''
Created on Oct 12, 2010
Decision Tree Source Code for Machine Learning in Action Ch. 3
@author: Peter Harrington
'''
from math import log
import operator
# prepare data set
# create data set and corrsponding labels
import pandas as pd
def run():
data = pd.read_csv('Desktop_Win10Cortana_TRAIN_en-US_Live_2017-04_Desktop_Win10Cortana_Standard_en-us.hyp', sep='\t')
wordCount = 0
wordDict = {}
for index, item in data.iterrows():
wordList = item[8].split(' ')
for word in wordList:
@lttzzlll
lttzzlll / task1.py
Last active August 1, 2017 12:12
task 1
'''
Python Training
https://microsoft.sharepoint.com/teams/stca/ipe/sr/_layouts/15/WopiFrame.aspx?sourcedoc={57bcddd2-341f-4151-84f7-f332fca4d07a}&action=edit&wd=target%28LearningCorner%2Eone%7CDF7F96A9-2186-462F-A0EC-8772881176AA%2FPerl%20and%20C%23%20Training%7C1C6AD0B9-765A-4DD5-A3C7-0822927FC9D1%2F%29
'''
import xml.etree.ElementTree as ET
from operator import itemgetter
import argparse
SRC_FILE_NAME = 'ITA_Blind_R2.xml'
DES_FILE_NAME = 'task1_output.txt'
@lttzzlll
lttzzlll / task2.py
Created August 1, 2017 12:12
task 2
'''
Python Training
https://microsoft.sharepoint.com/teams/stca/ipe/sr/_layouts/15/WopiFrame.aspx?sourcedoc={57bcddd2-341f-4151-84f7-f332fca4d07a}&action=edit&wd=target%28LearningCorner%2Eone%7CDF7F96A9-2186-462F-A0EC-8772881176AA%2FPerl%20and%20C%23%20Training%7C1C6AD0B9-765A-4DD5-A3C7-0822927FC9D1%2F%29
'''
import pandas as pd
from operator import itemgetter
import argparse
SRC_FILE_NAME = 'Desktop_Merino_ThresholdCortana_Train_de-de_Li_1.hyp'
DES_FILE_NAME = 'task2_output.txt'
'''
Python Training
'''
import argparse
import xml.etree.ElementTree as ET
from operator import itemgetter
import codecs
SRC_FILE_NAME = 'CortanaLiveData_ja-JP_2015_Audio_test.xml'
DES_FILE_NAME = 'task3_output.hyp'
'''
Python Training
'''
import argparse
import codecs
import xml.etree.ElementTree as ET
import pandas as pd
INPUT_XML = 'CortanaLiveData_ja-JP_2015_Audio_test_Result.xml'
INPUT_HYP = 'task3_output.hyp'
'''
Python Training
'''
import os
import codecs
import argparse
# import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
'''
Python Training
'''
import os
import codecs
import argparse
# import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup