Skip to content

Instantly share code, notes, and snippets.

@DrDub
Created January 5, 2022 22:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DrDub/42e7c47457bc97db29be2d61087f2981 to your computer and use it in GitHub Desktop.
Save DrDub/42e7c47457bc97db29be2d61087f2981 to your computer and use it in GitHub Desktop.
Jupyter multi-label text classification widget, ideal for creating few-shot learning annotations
# Annotator Widget
# Copyright (C) 2022 Pablo Duboue - Licensed under MIT license
# define the following variables beforehand:
# classes = list of strings, three character classes display better
# titles = list of strings, one title per document to be annotated
# texts = list of (list of strings), one list of lines (strings) per document to be annotated
# annotations = [ set() for _ in range(len(texts)) ] # annotations
# current = 0 # current document being displayed
# fulltext = False # whether to show full text or top/bottom
from ipywidgets import widgets
def widget():
annotator = widgets.VBox()
def render():
title = widgets.Label(titles[current], layout=widgets.Layout(width="600px"))
pos = widgets.Text(description='pos',value=str(current), layout=widgets.Layout(width="120px"))
gobtn = widgets.Button(description='go', layout=widgets.Layout(width="40px"))
def go(b):
global current
current = int(pos.value)
render()
gobtn.on_click(go)
nxtbtn = widgets.Button(description='next', layout=widgets.Layout(width="60px"))
def nxt(b):
global current
current = (current + 1) % len(texts)
render()
nxtbtn.on_click(nxt)
header = widgets.HBox(children = (title,pos,gobtn,nxtbtn))
vchildren = list()
showtoggle = widgets.Button(description="Top/Bottom" if fulltext else "See All Text")
def toggle(b):
global fulltext
fulltext = not fulltext
render()
showtoggle.on_click(toggle)
vchildren.append(widgets.HBox(children = (
widgets.Label("Lines: {:,}".format(len(texts[current]))), showtoggle)
))
def toggleann(evt):
global annotations
global current
if evt['name'] == 'value':
if evt['new']:
annotations[current].add(evt['owner'].description)
else:
annotations[current].remove(evt['owner'].description)
wclasses = classes
while wclasses:
children = [ widgets.Checkbox(
description=c, value=c in annotations[current],
layout=widgets.Layout(width="140px")
) for c in wclasses[:5] ]
vchildren.append(widgets.HBox(children = children))
wclasses = wclasses[5:]
for c in children:
c.observe(toggleann)
if fulltext:
full = widgets.Textarea(value="\n".join(texts[current]), rows=50)
vchildren.append(full)
else:
top = widgets.Textarea(value="\n".join(texts[current][:100]), rows=10)
bottom = widgets.Textarea(value="\n".join(texts[current][-100:]), rows=10)
vchildren.append(widgets.HBox(
children = (widgets.Label("Top"), top, widgets.Label("Bottom"), bottom)
))
viewer = widgets.VBox(children=vchildren)
annotator.children = ( header, viewer )
render()
return annotator
widget()
@DrDub
Copy link
Author

DrDub commented Jan 5, 2022

Looks like this:

image

As you go annotating, the annotations dictionary gets populated and can be used concurrent with the annotation process.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment