Skip to content

Instantly share code, notes, and snippets.

@kvorion
kvorion / Program.cs
Created May 25, 2011 06:49
NetSVMLight: Cross validation for model selection
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using NetSVMLight;
using System.IO;
namespace NetSVMLightConsoleApplication1
{
class Program
@kvorion
kvorion / Program.cs
Created May 9, 2011 18:19
Console application that uses the NetSVMLight library
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using NetSVMLight;
namespace NetSVMLightConsoleApplication1
{
class Program
{
@kvorion
kvorion / comment.py
Created May 8, 2011 09:08
thank your friends for wishing you on your birthday on facebook
import simplejson as json
import urllib2, urllib, string
read_token = 'abc
write_token = 'def'
def GetJsonResponse(requestUrl, content = None):
request = urllib2.Request(requestUrl)
response = urllib2.urlopen(request, content)
result = json.load(response)
package FacebookGraph;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
@kvorion
kvorion / gist:916489
Created April 12, 2011 21:43
TwitterNetwork.java
package TwitterGraph;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
import java.util.Properties;
@kvorion
kvorion / naivebayes-classify.py
Created December 7, 2010 05:16
classify method
def Classify(self, featureVector): #featureVector is a simple list like the ones that we use to train
probabilityPerLabel = {} #store the final probability for each class label
for label in self.labelCounts:
logProb = 0
for featureValue in featureVector:
logProb += math.log(self.featureCounts[(label, self.featureNameList[featureVector.index(featureValue)], featureValue)]/self.labelCounts[label])
probabilityPerLabel[label] = (self.labelCounts[label]/sum(self.labelCounts.values())) * math.exp(logProb)
print probabilityPerLabel
return max(probabilityPerLabel, key = lambda classLabel: probabilityPerLabel[classLabel])
@kvorion
kvorion / naivebayes-trainclassifier.py
Created December 7, 2010 05:09
trainclassifier method
def TrainClassifier(self):
for fv in self.featureVectors:
self.labelCounts[fv[len(fv)-1]] += 1 #udpate count of the label
for counter in range(0, len(fv)-1):
self.featureCounts[(fv[len(fv)-1], self.featureNameList[counter], fv[counter])] += 1
for label in self.labelCounts: #increase label counts (smoothing). remember that the last feature is actually the label
for feature in self.featureNameList[:len(self.featureNameList)-1]:
self.labelCounts[label] += len(self.features[feature])
@kvorion
kvorion / naivebayes-getvalues.py
Created December 7, 2010 05:06
getvalues method
def GetValues(self):
file = open(self.trainingFile, 'r')
for line in file:
if line[0] != '@': #start of actual data
self.featureVectors.append(line.strip().lower().split(','))
else: #feature definitions
if line.strip().lower().find('@data') == -1 and (not line.lower().startswith('@relation')):
self.featureNameList.append(line.strip().split()[1])
self.features[self.featureNameList[len(self.featureNameList) - 1]] = line[line.find('{')+1: line.find('}')].strip().split(',')
file.close()
@kvorion
kvorion / naivebayes-model.py
Created December 7, 2010 04:52
the model class
from __future__ import division
import collections
import math
class Model:
def __init__(self, arffFile):
self.trainingFile = arffFile
self.features = {} #all feature names and their possible values (including the class label)
self.featureNameList = [] #this is to maintain the order of features as in the arff
self.featureCounts = collections.defaultdict(lambda: 1)#contains tuples of the form (label, feature_name, feature_value)
@kvorion
kvorion / naivebayes.py
Created December 7, 2010 03:34
naive bayes implementation
#Author: Krishnamurthy Koduvayur Viswanathan
from __future__ import division
import collections
import math
class Model:
def __init__(self, arffFile):
self.trainingFile = arffFile
self.features = {} #all feature names and their possible values (including the class label)