Skip to content

Instantly share code, notes, and snippets.

@zarzen
Last active April 11, 2017 05:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zarzen/51580bc3be06e23379fd5f9154f1ab87 to your computer and use it in GitHub Desktop.
Save zarzen/51580bc3be06e23379fd5f9154f1ab87 to your computer and use it in GitHub Desktop.
Annotate Data
class AnnotatedReview:
def __init__(self, sentence, feature_list, opinion_list):
self.sentence = sentence
self.feature_list = feature_list
self.opinion_list = opinion_list
def __str__(self):
return self.sentence
def __repr__(self):
return self.sentence
def __eq__(self, other):
if self.sentence() == other.sentence():
return True
return False
def __hash__(self):
return hash(self.sentence)
"""
Simple GUI tool for annotating data
"""
from tkinter import *
from pathlib import Path
txt_data_file = "./annotated_reviews.txt"
class App(Frame):
def __init__(self, master=None):
super().__init__(master)
self.grid()
self.create_widgets()
self.current_sent_n = 0
self.load_init_data()
self.bind_listener()
def create_widgets(self):
self.Ns_lab = Label(self, text="Features: ")
self.Op_lab = Label(self, text="Opinions: ")
self.AN_lab = Label(self, text="Adj-Noun pair")
self.S_lab = Label(self, text="Sentences: ")
self.Ns_lab.grid(row=0, column=0, padx=10, pady=10, sticky=W)
self.Op_lab.grid(row=1, column=0, padx=10, pady=10, sticky=W)
self.AN_lab.grid(row=2, column=0, padx=10, pady=10, sticky=W)
self.S_lab.grid(row=3, column=0, padx=10, pady=10, sticky=W)
self.Ns_entry = Entry(self, width = 70)
self.Ns_entry.grid(row=0, column=1, sticky=W)
self.Op_entry = Entry(self, width=70)
self.Op_entry.grid(row=1, column=1, sticky=W)
self.AN_entry = Entry(self, width=70)
self.AN_entry.grid(row=2, column=1, sticky=W)
self.sent_txt = Text(self)
self.sent_txt.grid(row=3, column=1)
self.current_sent_lab = Label(self)
self.current_sent_lab.grid(row=4, column=0, padx=10, pady=10, sticky=W)
self.all_sent_num = Label(self)
self.all_sent_num.grid(row=4, column=1, padx=10, pady=10, sticky=W)
self.next_btn = Button(self)
self.next_btn['text'] = "next sentence"
self.next_btn.grid(row=5, column = 1, padx=10, pady=10, sticky=W)
self.pre_btn = Button(self)
self.pre_btn['text'] = "previous"
self.pre_btn.grid(row=5, column=0, padx=10, pady=10, sticky=W)
self.save_btn = Button(self)
self.save_btn['text'] = "Save"
self.save_btn.grid(row=5, column=2, padx=10, pady=10, sticky=W)
self.jmp_btn = Button(self)
self.jmp_btn['text'] = "Jump to"
self.jmp_btn.grid(row=6, column=0, padx=10, pady=10, sticky=W)
self.jmp_entry = Entry(self)
self.jmp_entry.grid(row=6, column=1, sticky=W)
def bind_listener(self):
""""""
self.next_btn.bind("<Button-1>", self.next_sent)
self.pre_btn.bind("<Button-1>", self.pre_sent)
self.save_btn.bind("<Button-1>", self.save_reviews)
self.jmp_btn.bind("<Button-1>", self.jump_to)
def load_init_data(self):
"""
load initial data
will consider annotated_reviews.txt first
:return:
"""
reviews_txt = Path(txt_data_file)
if reviews_txt.is_file():
self.txt_data_found = True
self.load_txt_data()
self.display_sent()
else:
self.txt_data_found = False
def load_txt_data(self):
self.reviews = []
reviews_txt = open(txt_data_file, "r")
lines = reviews_txt.readlines()
lines_len = len(lines)
n_reviews = int(lines_len / 5)
for i in range(0, n_reviews):
f = lines[i*5+0].strip('\n')
o = lines[i*5+1].strip('\n')
an = lines[i*5+2].strip('\n')
s = lines[i*5+3].strip('\n')
# sep = lines[i*5+4]
r = TxtReview(f, o, an, s)
self.reviews.append(r)
reviews_txt.close()
def display_sent(self):
r = self.reviews[self.current_sent_n]
self.current_sent_lab["text"] = str(self.current_sent_n+1)
self.all_sent_num["text"] = str(len(self.reviews))
# clear first
self.Ns_entry.delete(0, END)
self.Op_entry.delete(0, END)
self.AN_entry.delete(0, END)
self.sent_txt.delete("1.0", END)
# insert new value
self.Ns_entry.insert(0, r.features)
self.Op_entry.insert(0, r.opinions)
self.AN_entry.insert(0, r.ANpairs)
self.sent_txt.insert(END, r.sentence)
def save_current_sent(self):
f = self.Ns_entry.get()
o = self.Op_entry.get()
an = self.AN_entry.get()
# s = self.sent_txt.get("1.0", END)
# sent will not change
s = self.reviews[self.current_sent_n].sentence
r = TxtReview(f, o, an, s)
self.reviews[self.current_sent_n] = r
def next_sent(self, event):
self.save_current_sent()
if (self.current_sent_n + 1 < len(self.reviews)):
self.current_sent_n += 1
self.display_sent()
else:
print("error out of boundary")
def pre_sent(self, event):
self.save_current_sent()
if(self.current_sent_n > 0):
self.current_sent_n -= 1
self.display_sent()
else:
print("error out of boundary")
def save_reviews(self, event):
self.save_current_sent()
reviews_txt = open(txt_data_file, "w")
for r in self.reviews:
# write features
reviews_txt.write(r.features)
reviews_txt.write("\n")
# write opinions
reviews_txt.write(r.opinions)
reviews_txt.write("\n")
# write A-N pairs
reviews_txt.write(r.ANpairs)
reviews_txt.write("\n")
# write sent
reviews_txt.write(r.sentence)
reviews_txt.write("\n")
# write separator
reviews_txt.write("-"*20+"\n")
reviews_txt.close()
def jump_to(self, event):
sent_n = self.jmp_entry.get()
sent_n = int(sent_n)
if(sent_n < 1 or sent_n > len(self.reviews)):
print("out of boundary")
else:
self.save_current_sent()
self.current_sent_n = sent_n - 1
self.display_sent()
def close(self):
self.save_reviews(None)
self.master.quit()
class TxtReview:
def __init__(self, features, opinions, ANpairs, sentence):
""""""
self.features = features
self.opinions = opinions
self.ANpairs = ANpairs
self.sentence = sentence
root = Tk()
root.title("annotation tool")
root.geometry("900x700")
app = App(master=root)
root.protocol("WM_DELETE_WINDOW", app.close)
app.mainloop()
import pickle
import AnnotatedReview
reviews_file = open("reviews.pickle", "rb")
reviews = pickle.load(reviews_file)
reviews_txt = open("annotated_reviews.txt", "w")
for r in reviews:
# write features
for f in r.feature_list:
reviews_txt.write(f)
reviews_txt.write(", ")
reviews_txt.write("\n")
# write opinions
for op in r.opinion_list:
reviews_txt.write(op)
reviews_txt.write(", ")
reviews_txt.write("\n")
# write empty line for A-N pairs
reviews_txt.write("\n")
# write sentence
reviews_txt.write(r.sentence)
reviews_txt.write("\n")
# write a separator
reviews_txt.write("-"*20 + "\n")
reviews_txt.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment