Created
March 2, 2022 08:58
-
-
Save ModMaamari/59c846fb93f3efaf2415d4e883e5d254 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def from_label_studio_to_dataframe( LABEL_STUDIO_URL="", | |
API_KEY=""): | |
''' | |
Goals: | |
- Load the labeled data from Label Studio | |
(or from a raw_data dictionary saved locally as a pickle file), clean it, and save it into a panda data frame | |
Attributes: | |
- LABEL_STUDIO_URL (url as string): the url for the label studio project you want to get your data from | |
- API_KEY (string): your Label Studio API_KEY | |
Returns: | |
- df (pandas DataFrame): The labeled data as a pandas DataFrame | |
''' | |
ls = Client(url=LABEL_STUDIO_URL, api_key=API_KEY) | |
ls.check_connection() | |
pro = project.Project.get_from_id(ls, "1") | |
raw_data = project.Project.get_labeled_tasks(pro) | |
df = pd.DataFrame(columns=["text", "category", "stage", "level"]) | |
question_tags = [ | |
"Question_1_Company_specific", "Question_1_Market_related", | |
"Question_2_specific", "Question_2_open", | |
"Question_3_attack", "Question_3_support", "Question_3_neutral" | |
] | |
answer_tags = [ | |
"Answer_1_specific", "Answer_1_avoid_excuse", | |
"Answer_2_negative", "Answer_2_positive", | |
"Answer_3_blame", "Answer_3_no_blame" | |
] | |
tag2val = { | |
"Question_1_Company_specific": 0, | |
"Question_1_Market_related": 1, | |
"Question_2_specific": 0, | |
"Question_2_open": 1, | |
"Question_3_attack": 2, | |
"Question_3_support": 0, | |
"Question_3_neutral": 1, | |
"Answer_1_specific": 0, | |
"Answer_1_avoid_excuse": 1, | |
"Answer_2_negative": 0, | |
"Answer_2_positive": 1, | |
"Answer_3_blame": 0, | |
"Answer_3_no_blame": 1 | |
} | |
for task in tqdm(raw_data): | |
annotations = task["annotations"][0]["result"] | |
for ann in annotations: | |
text = ann["value"]["text"] | |
label = ann["value"]["labels"][0] | |
# Ignore any odservation that does not have any of the question and answer stages | |
# (ex: an observation that only labels the question QID_13) | |
if len(label) < 7: | |
continue | |
# Get the category "Answer" or "Question" from the label | |
category = "Answer" if "Answer" in label else "Question" | |
# Get the stage number | |
stage = int(label.split("_")[1]) | |
# Get the level | |
level = tag2val[label] | |
# Add the observation to the dataframe | |
df = df.append( | |
{ | |
"text": text, | |
"category": category, | |
"stage": stage, | |
"level": level | |
}, | |
ignore_index=True) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment