Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created February 19, 2021 07:16
Show Gist options
  • Save amankharwal/c8c67b09353e98f483f01511b489ac9e to your computer and use it in GitHub Desktop.
Save amankharwal/c8c67b09353e98f483f01511b489ac9e to your computer and use it in GitHub Desktop.
import re
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
def read_data(file):
data = []
with open(file, 'r')as f:
for line in f:
line = line.strip()
label = ' '.join(line[1:line.find("]")].strip().split())
text = line[line.find("]")+1:].strip()
data.append([label, text])
return data
file = 'text.txt'
data = read_data(file)
print("Number of instances: {}".format(len(data)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment