-
-
Save bwang482/f3d12db44c8e8809164d2f943ad21722 to your computer and use it in GitHub Desktop.
Prodigy hierarchical text classification (testing)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function toggle(id) { | |
var x = document.getElementById(id); | |
if (id == "a"){ | |
reset("b") | |
}else{ | |
reset("a") | |
} | |
if (x.style.display === "none") { | |
x.style.display = "block"; | |
} else { | |
x.style.display = "none"; | |
} | |
} | |
function reset(id){ | |
var x = document.getElementById(id); | |
x.style.display = "none" | |
var checkboxes = document.getElementsByClassName("checkbox"); | |
for(let elem in checkboxes){ | |
checkboxes[elem].checked = false; | |
} | |
} | |
function update(){ | |
var checkboxes = document.getElementsByClassName("checkbox"); | |
var results = []; | |
for(let elem in checkboxes){ | |
if(checkboxes[elem].checked){ | |
results.push(checkboxes[elem].id) | |
} | |
} | |
prodigy.update({ | |
selected: results | |
}) | |
} | |
document.addEventListener('prodigyanswer', event => { | |
reset("a") | |
reset("b") | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"a": ["sub-option a1","sub-option a2"], "b": ["sub-option b1","sub-option b2"]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import jinja2 | |
from typing import Union | |
from pathlib import Path | |
import srsly | |
import prodigy | |
from prodigy.util import msg | |
from prodigy import set_hashes, get_stream | |
def load_template(path: Union[str, Path]) -> jinja2.Template: | |
if not isinstance(path, Path): | |
path = Path(path) | |
if not path.suffix == ".jinja2": | |
msg.fail( | |
"Must supply jinja2 file.", | |
exits=1, | |
) | |
with path.open("r", encoding="utf8") as file_: | |
text = file_.read() | |
return jinja2.Template(text, undefined=jinja2.DebugUndefined) | |
@prodigy.recipe( | |
"textcat.hierarchical", | |
dataset=("The dataset to save to", "positional", None, str), | |
source=("The source data as a JSONL file", "positional", None, str), | |
labels=("The label hierarchy as a JSONL file", "positional", None, str), | |
) | |
def textcat_hierarchical( | |
dataset: str, | |
source: str, | |
labels: str, | |
): | |
options = list(srsly.read_jsonl(labels))[0] | |
stream = get_stream(source, rehash=True, dedup=True) | |
template = load_template("template.jinja2") | |
def add_template(stream): | |
for ex in stream: | |
ex['html'] = template.render(options=options) | |
yield set_hashes(ex) | |
custom_js = Path("custom.js").read_text() | |
def before_db(examples): | |
for ex in examples: | |
del ex['html'] | |
print(examples) | |
return examples | |
blocks = [ | |
{"view_id": "text"}, | |
{"view_id": "html"}, | |
] | |
return { | |
"view_id": "blocks", | |
"dataset": dataset, # Name of dataset to save annotations | |
"stream": add_template(stream), # Incoming stream of examples | |
"config": { | |
"blocks": blocks, | |
"javascript": custom_js, | |
}, | |
"before_db": before_db | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"text":"Uber\u2019s Lesson: Silicon Valley\u2019s Start-Up Machine Needs Fixing","meta":{"source":"The New York Times"}} | |
{"text":"Pearl Automation, Founded by Apple Veterans, Shuts Down","meta":{"source":"The New York Times"}} | |
{"text":"How Silicon Valley Pushed Coding Into American Classrooms","meta":{"source":"The New York Times"}} | |
{"text":"Women in Tech Speak Frankly on Culture of Harassment","meta":{"source":"The New York Times"}} | |
{"text":"Silicon Valley Investors Flexed Their Muscles in Uber Fight","meta":{"source":"The New York Times"}} | |
{"text":"Uber is a Creature of an Industry Struggling to Grow Up","meta":{"source":"The New York Times"}} | |
{"text":"\u2018The Internet Is Broken\u2019: @ev Is Trying to Salvage It","meta":{"source":"The New York Times"}} | |
{"text":"The South Park Commons Fills a Hole in the Tech Landscape","meta":{"source":"The New York Times"}} | |
{"text":"The Closing of the Republican Mind","meta":{"source":"The New York Times"}} | |
{"text":"Writers From the Right and Left on Trump Jr., the Future of the F.B.I., Health Care and More","meta":{"source":"The New York Times"}} | |
{"text":"Daily Report: From Lean to Fat Start-Ups","meta":{"source":"The New York Times"}} | |
{"text":"How Uber\u2019s Chief Is Gaining Even More Clout in the Company","meta":{"source":"The New York Times"}} | |
{"text":"As New Zealand Courts Tech Talent, Isolation Becomes a Draw","meta":{"source":"The New York Times"}} | |
{"text":"One Thing Silicon Valley Can\u2019t Seem to Fix","meta":{"source":"The New York Times"}} | |
{"text":"In Silicon Valley, a Voice of Caution Guides a High-Flying Uber","meta":{"source":"The New York Times"}} | |
{"text":"Silicon Valley Writes a Protest Letter Against Trump","meta":{"source":"The New York Times"}} | |
{"text":"Warriors, Tech\u2019s Team, Are Soaring Out of Reach","meta":{"source":"The New York Times"}} | |
{"text":"Silicon Valley\u2019s Most Elusive Beast","meta":{"source":"The New York Times"}} | |
{"text":"Wall Street and Silicon Valley Form an Uneasy Alliance","meta":{"source":"The New York Times"}} | |
{"text":"Tim O'Reilly Explains the Internet of Things","meta":{"source":"The New York Times"}} | |
{"text":"Alibaba I.P.O. May Unleash Global Fight Over Users","meta":{"source":"The New York Times"}} | |
{"text":"Disruptions: Looking Beyond Silicon Valley's Bubble","meta":{"source":"The New York Times"}} | |
{"text":"Twitter Outages Linked to Glitches and Site Upgrade","meta":{"source":"The New York Times"}} | |
{"text":"Out of the Loop in Silicon Valley","meta":{"source":"The New York Times"}} | |
{"text":"A Determined Outpost of Tiny Technology","meta":{"source":"The New York Times"}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<button onclick="toggle('a')">Option A</button> | |
<div id="a" style="display: none;"> | |
<form style="display: block;"> | |
{%- for reason in options["a"] -%} | |
<input type="checkbox" class="checkbox" id="{{reason}}" name="{{reason}}" onchange="update()" style="margin: 0.4rem;"><label for="{{reason}}">{{reason}}</label><br> | |
{%- endfor -%} | |
</form> | |
</div> | |
<button onclick="toggle('b')">Option B</button> | |
<div id="b" style="display: none;"> | |
<form style="display: block;"> | |
{%- for reason in options["b"] -%} | |
<input type="checkbox" class="checkbox" id="{{reason}}" name="{{reason}}" onchange="update()" style="margin: 0.4rem;"><label for="{{reason}}">{{reason}}</label><br> | |
{%- endfor -%} | |
</form> | |
</div> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python -m prodigy textcat.hierarchical news-hier ./data/news_headlines.jsonl ./data/labels.jsonl -F my_recipe2.py