Last active
April 11, 2017 05:27
-
-
Save zarzen/51580bc3be06e23379fd5f9154f1ab87 to your computer and use it in GitHub Desktop.
Annotate Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class AnnotatedReview: | |
def __init__(self, sentence, feature_list, opinion_list): | |
self.sentence = sentence | |
self.feature_list = feature_list | |
self.opinion_list = opinion_list | |
def __str__(self): | |
return self.sentence | |
def __repr__(self): | |
return self.sentence | |
def __eq__(self, other): | |
if self.sentence() == other.sentence(): | |
return True | |
return False | |
def __hash__(self): | |
return hash(self.sentence) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Simple GUI tool for annotating data | |
""" | |
from tkinter import * | |
from pathlib import Path | |
txt_data_file = "./annotated_reviews.txt" | |
class App(Frame): | |
def __init__(self, master=None): | |
super().__init__(master) | |
self.grid() | |
self.create_widgets() | |
self.current_sent_n = 0 | |
self.load_init_data() | |
self.bind_listener() | |
def create_widgets(self): | |
self.Ns_lab = Label(self, text="Features: ") | |
self.Op_lab = Label(self, text="Opinions: ") | |
self.AN_lab = Label(self, text="Adj-Noun pair") | |
self.S_lab = Label(self, text="Sentences: ") | |
self.Ns_lab.grid(row=0, column=0, padx=10, pady=10, sticky=W) | |
self.Op_lab.grid(row=1, column=0, padx=10, pady=10, sticky=W) | |
self.AN_lab.grid(row=2, column=0, padx=10, pady=10, sticky=W) | |
self.S_lab.grid(row=3, column=0, padx=10, pady=10, sticky=W) | |
self.Ns_entry = Entry(self, width = 70) | |
self.Ns_entry.grid(row=0, column=1, sticky=W) | |
self.Op_entry = Entry(self, width=70) | |
self.Op_entry.grid(row=1, column=1, sticky=W) | |
self.AN_entry = Entry(self, width=70) | |
self.AN_entry.grid(row=2, column=1, sticky=W) | |
self.sent_txt = Text(self) | |
self.sent_txt.grid(row=3, column=1) | |
self.current_sent_lab = Label(self) | |
self.current_sent_lab.grid(row=4, column=0, padx=10, pady=10, sticky=W) | |
self.all_sent_num = Label(self) | |
self.all_sent_num.grid(row=4, column=1, padx=10, pady=10, sticky=W) | |
self.next_btn = Button(self) | |
self.next_btn['text'] = "next sentence" | |
self.next_btn.grid(row=5, column = 1, padx=10, pady=10, sticky=W) | |
self.pre_btn = Button(self) | |
self.pre_btn['text'] = "previous" | |
self.pre_btn.grid(row=5, column=0, padx=10, pady=10, sticky=W) | |
self.save_btn = Button(self) | |
self.save_btn['text'] = "Save" | |
self.save_btn.grid(row=5, column=2, padx=10, pady=10, sticky=W) | |
self.jmp_btn = Button(self) | |
self.jmp_btn['text'] = "Jump to" | |
self.jmp_btn.grid(row=6, column=0, padx=10, pady=10, sticky=W) | |
self.jmp_entry = Entry(self) | |
self.jmp_entry.grid(row=6, column=1, sticky=W) | |
def bind_listener(self): | |
"""""" | |
self.next_btn.bind("<Button-1>", self.next_sent) | |
self.pre_btn.bind("<Button-1>", self.pre_sent) | |
self.save_btn.bind("<Button-1>", self.save_reviews) | |
self.jmp_btn.bind("<Button-1>", self.jump_to) | |
def load_init_data(self): | |
""" | |
load initial data | |
will consider annotated_reviews.txt first | |
:return: | |
""" | |
reviews_txt = Path(txt_data_file) | |
if reviews_txt.is_file(): | |
self.txt_data_found = True | |
self.load_txt_data() | |
self.display_sent() | |
else: | |
self.txt_data_found = False | |
def load_txt_data(self): | |
self.reviews = [] | |
reviews_txt = open(txt_data_file, "r") | |
lines = reviews_txt.readlines() | |
lines_len = len(lines) | |
n_reviews = int(lines_len / 5) | |
for i in range(0, n_reviews): | |
f = lines[i*5+0].strip('\n') | |
o = lines[i*5+1].strip('\n') | |
an = lines[i*5+2].strip('\n') | |
s = lines[i*5+3].strip('\n') | |
# sep = lines[i*5+4] | |
r = TxtReview(f, o, an, s) | |
self.reviews.append(r) | |
reviews_txt.close() | |
def display_sent(self): | |
r = self.reviews[self.current_sent_n] | |
self.current_sent_lab["text"] = str(self.current_sent_n+1) | |
self.all_sent_num["text"] = str(len(self.reviews)) | |
# clear first | |
self.Ns_entry.delete(0, END) | |
self.Op_entry.delete(0, END) | |
self.AN_entry.delete(0, END) | |
self.sent_txt.delete("1.0", END) | |
# insert new value | |
self.Ns_entry.insert(0, r.features) | |
self.Op_entry.insert(0, r.opinions) | |
self.AN_entry.insert(0, r.ANpairs) | |
self.sent_txt.insert(END, r.sentence) | |
def save_current_sent(self): | |
f = self.Ns_entry.get() | |
o = self.Op_entry.get() | |
an = self.AN_entry.get() | |
# s = self.sent_txt.get("1.0", END) | |
# sent will not change | |
s = self.reviews[self.current_sent_n].sentence | |
r = TxtReview(f, o, an, s) | |
self.reviews[self.current_sent_n] = r | |
def next_sent(self, event): | |
self.save_current_sent() | |
if (self.current_sent_n + 1 < len(self.reviews)): | |
self.current_sent_n += 1 | |
self.display_sent() | |
else: | |
print("error out of boundary") | |
def pre_sent(self, event): | |
self.save_current_sent() | |
if(self.current_sent_n > 0): | |
self.current_sent_n -= 1 | |
self.display_sent() | |
else: | |
print("error out of boundary") | |
def save_reviews(self, event): | |
self.save_current_sent() | |
reviews_txt = open(txt_data_file, "w") | |
for r in self.reviews: | |
# write features | |
reviews_txt.write(r.features) | |
reviews_txt.write("\n") | |
# write opinions | |
reviews_txt.write(r.opinions) | |
reviews_txt.write("\n") | |
# write A-N pairs | |
reviews_txt.write(r.ANpairs) | |
reviews_txt.write("\n") | |
# write sent | |
reviews_txt.write(r.sentence) | |
reviews_txt.write("\n") | |
# write separator | |
reviews_txt.write("-"*20+"\n") | |
reviews_txt.close() | |
def jump_to(self, event): | |
sent_n = self.jmp_entry.get() | |
sent_n = int(sent_n) | |
if(sent_n < 1 or sent_n > len(self.reviews)): | |
print("out of boundary") | |
else: | |
self.save_current_sent() | |
self.current_sent_n = sent_n - 1 | |
self.display_sent() | |
def close(self): | |
self.save_reviews(None) | |
self.master.quit() | |
class TxtReview: | |
def __init__(self, features, opinions, ANpairs, sentence): | |
"""""" | |
self.features = features | |
self.opinions = opinions | |
self.ANpairs = ANpairs | |
self.sentence = sentence | |
root = Tk() | |
root.title("annotation tool") | |
root.geometry("900x700") | |
app = App(master=root) | |
root.protocol("WM_DELETE_WINDOW", app.close) | |
app.mainloop() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import AnnotatedReview | |
reviews_file = open("reviews.pickle", "rb") | |
reviews = pickle.load(reviews_file) | |
reviews_txt = open("annotated_reviews.txt", "w") | |
for r in reviews: | |
# write features | |
for f in r.feature_list: | |
reviews_txt.write(f) | |
reviews_txt.write(", ") | |
reviews_txt.write("\n") | |
# write opinions | |
for op in r.opinion_list: | |
reviews_txt.write(op) | |
reviews_txt.write(", ") | |
reviews_txt.write("\n") | |
# write empty line for A-N pairs | |
reviews_txt.write("\n") | |
# write sentence | |
reviews_txt.write(r.sentence) | |
reviews_txt.write("\n") | |
# write a separator | |
reviews_txt.write("-"*20 + "\n") | |
reviews_txt.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment