Skip to content

Instantly share code, notes, and snippets.

@bwang482
Created June 28, 2023 21:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bwang482/f3d12db44c8e8809164d2f943ad21722 to your computer and use it in GitHub Desktop.
Save bwang482/f3d12db44c8e8809164d2f943ad21722 to your computer and use it in GitHub Desktop.
Prodigy hierarchical text classification (testing)
function toggle(id) {
var x = document.getElementById(id);
if (id == "a"){
reset("b")
}else{
reset("a")
}
if (x.style.display === "none") {
x.style.display = "block";
} else {
x.style.display = "none";
}
}
function reset(id){
var x = document.getElementById(id);
x.style.display = "none"
var checkboxes = document.getElementsByClassName("checkbox");
for(let elem in checkboxes){
checkboxes[elem].checked = false;
}
}
function update(){
var checkboxes = document.getElementsByClassName("checkbox");
var results = [];
for(let elem in checkboxes){
if(checkboxes[elem].checked){
results.push(checkboxes[elem].id)
}
}
prodigy.update({
selected: results
})
}
document.addEventListener('prodigyanswer', event => {
reset("a")
reset("b")
})
{"a": ["sub-option a1","sub-option a2"], "b": ["sub-option b1","sub-option b2"]}
import jinja2
from typing import Union
from pathlib import Path
import srsly
import prodigy
from prodigy.util import msg
from prodigy import set_hashes, get_stream
def load_template(path: Union[str, Path]) -> jinja2.Template:
if not isinstance(path, Path):
path = Path(path)
if not path.suffix == ".jinja2":
msg.fail(
"Must supply jinja2 file.",
exits=1,
)
with path.open("r", encoding="utf8") as file_:
text = file_.read()
return jinja2.Template(text, undefined=jinja2.DebugUndefined)
@prodigy.recipe(
"textcat.hierarchical",
dataset=("The dataset to save to", "positional", None, str),
source=("The source data as a JSONL file", "positional", None, str),
labels=("The label hierarchy as a JSONL file", "positional", None, str),
)
def textcat_hierarchical(
dataset: str,
source: str,
labels: str,
):
options = list(srsly.read_jsonl(labels))[0]
stream = get_stream(source, rehash=True, dedup=True)
template = load_template("template.jinja2")
def add_template(stream):
for ex in stream:
ex['html'] = template.render(options=options)
yield set_hashes(ex)
custom_js = Path("custom.js").read_text()
def before_db(examples):
for ex in examples:
del ex['html']
print(examples)
return examples
blocks = [
{"view_id": "text"},
{"view_id": "html"},
]
return {
"view_id": "blocks",
"dataset": dataset, # Name of dataset to save annotations
"stream": add_template(stream), # Incoming stream of examples
"config": {
"blocks": blocks,
"javascript": custom_js,
},
"before_db": before_db
}
{"text":"Uber\u2019s Lesson: Silicon Valley\u2019s Start-Up Machine Needs Fixing","meta":{"source":"The New York Times"}}
{"text":"Pearl Automation, Founded by Apple Veterans, Shuts Down","meta":{"source":"The New York Times"}}
{"text":"How Silicon Valley Pushed Coding Into American Classrooms","meta":{"source":"The New York Times"}}
{"text":"Women in Tech Speak Frankly on Culture of Harassment","meta":{"source":"The New York Times"}}
{"text":"Silicon Valley Investors Flexed Their Muscles in Uber Fight","meta":{"source":"The New York Times"}}
{"text":"Uber is a Creature of an Industry Struggling to Grow Up","meta":{"source":"The New York Times"}}
{"text":"\u2018The Internet Is Broken\u2019: @ev Is Trying to Salvage It","meta":{"source":"The New York Times"}}
{"text":"The South Park Commons Fills a Hole in the Tech Landscape","meta":{"source":"The New York Times"}}
{"text":"The Closing of the Republican Mind","meta":{"source":"The New York Times"}}
{"text":"Writers From the Right and Left on Trump Jr., the Future of the F.B.I., Health Care and More","meta":{"source":"The New York Times"}}
{"text":"Daily Report: From Lean to Fat Start-Ups","meta":{"source":"The New York Times"}}
{"text":"How Uber\u2019s Chief Is Gaining Even More Clout in the Company","meta":{"source":"The New York Times"}}
{"text":"As New Zealand Courts Tech Talent, Isolation Becomes a Draw","meta":{"source":"The New York Times"}}
{"text":"One Thing Silicon Valley Can\u2019t Seem to Fix","meta":{"source":"The New York Times"}}
{"text":"In Silicon Valley, a Voice of Caution Guides a High-Flying Uber","meta":{"source":"The New York Times"}}
{"text":"Silicon Valley Writes a Protest Letter Against Trump","meta":{"source":"The New York Times"}}
{"text":"Warriors, Tech\u2019s Team, Are Soaring Out of Reach","meta":{"source":"The New York Times"}}
{"text":"Silicon Valley\u2019s Most Elusive Beast","meta":{"source":"The New York Times"}}
{"text":"Wall Street and Silicon Valley Form an Uneasy Alliance","meta":{"source":"The New York Times"}}
{"text":"Tim O'Reilly Explains the Internet of Things","meta":{"source":"The New York Times"}}
{"text":"Alibaba I.P.O. May Unleash Global Fight Over Users","meta":{"source":"The New York Times"}}
{"text":"Disruptions: Looking Beyond Silicon Valley's Bubble","meta":{"source":"The New York Times"}}
{"text":"Twitter Outages Linked to Glitches and Site Upgrade","meta":{"source":"The New York Times"}}
{"text":"Out of the Loop in Silicon Valley","meta":{"source":"The New York Times"}}
{"text":"A Determined Outpost of Tiny Technology","meta":{"source":"The New York Times"}}
<button onclick="toggle('a')">Option A</button>
<div id="a" style="display: none;">
<form style="display: block;">
{%- for reason in options["a"] -%}
<input type="checkbox" class="checkbox" id="{{reason}}" name="{{reason}}" onchange="update()" style="margin: 0.4rem;"><label for="{{reason}}">{{reason}}</label><br>
{%- endfor -%}
</form>
</div>
<button onclick="toggle('b')">Option B</button>
<div id="b" style="display: none;">
<form style="display: block;">
{%- for reason in options["b"] -%}
<input type="checkbox" class="checkbox" id="{{reason}}" name="{{reason}}" onchange="update()" style="margin: 0.4rem;"><label for="{{reason}}">{{reason}}</label><br>
{%- endfor -%}
</form>
</div>
@bwang482
Copy link
Author

python -m prodigy textcat.hierarchical news-hier ./data/news_headlines.jsonl ./data/labels.jsonl -F my_recipe2.py

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment