Created
January 5, 2022 22:26
-
-
Save DrDub/42e7c47457bc97db29be2d61087f2981 to your computer and use it in GitHub Desktop.
Jupyter multi-label text classification widget, ideal for creating few-shot learning annotations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Annotator Widget | |
# Copyright (C) 2022 Pablo Duboue - Licensed under MIT license | |
# define the following variables beforehand: | |
# classes = list of strings, three character classes display better | |
# titles = list of strings, one title per document to be annotated | |
# texts = list of (list of strings), one list of lines (strings) per document to be annotated | |
# annotations = [ set() for _ in range(len(texts)) ] # annotations | |
# current = 0 # current document being displayed | |
# fulltext = False # whether to show full text or top/bottom | |
from ipywidgets import widgets | |
def widget(): | |
annotator = widgets.VBox() | |
def render(): | |
title = widgets.Label(titles[current], layout=widgets.Layout(width="600px")) | |
pos = widgets.Text(description='pos',value=str(current), layout=widgets.Layout(width="120px")) | |
gobtn = widgets.Button(description='go', layout=widgets.Layout(width="40px")) | |
def go(b): | |
global current | |
current = int(pos.value) | |
render() | |
gobtn.on_click(go) | |
nxtbtn = widgets.Button(description='next', layout=widgets.Layout(width="60px")) | |
def nxt(b): | |
global current | |
current = (current + 1) % len(texts) | |
render() | |
nxtbtn.on_click(nxt) | |
header = widgets.HBox(children = (title,pos,gobtn,nxtbtn)) | |
vchildren = list() | |
showtoggle = widgets.Button(description="Top/Bottom" if fulltext else "See All Text") | |
def toggle(b): | |
global fulltext | |
fulltext = not fulltext | |
render() | |
showtoggle.on_click(toggle) | |
vchildren.append(widgets.HBox(children = ( | |
widgets.Label("Lines: {:,}".format(len(texts[current]))), showtoggle) | |
)) | |
def toggleann(evt): | |
global annotations | |
global current | |
if evt['name'] == 'value': | |
if evt['new']: | |
annotations[current].add(evt['owner'].description) | |
else: | |
annotations[current].remove(evt['owner'].description) | |
wclasses = classes | |
while wclasses: | |
children = [ widgets.Checkbox( | |
description=c, value=c in annotations[current], | |
layout=widgets.Layout(width="140px") | |
) for c in wclasses[:5] ] | |
vchildren.append(widgets.HBox(children = children)) | |
wclasses = wclasses[5:] | |
for c in children: | |
c.observe(toggleann) | |
if fulltext: | |
full = widgets.Textarea(value="\n".join(texts[current]), rows=50) | |
vchildren.append(full) | |
else: | |
top = widgets.Textarea(value="\n".join(texts[current][:100]), rows=10) | |
bottom = widgets.Textarea(value="\n".join(texts[current][-100:]), rows=10) | |
vchildren.append(widgets.HBox( | |
children = (widgets.Label("Top"), top, widgets.Label("Bottom"), bottom) | |
)) | |
viewer = widgets.VBox(children=vchildren) | |
annotator.children = ( header, viewer ) | |
render() | |
return annotator | |
widget() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Looks like this:
As you go annotating, the annotations dictionary gets populated and can be used concurrent with the annotation process.