Skip to content

Instantly share code, notes, and snippets.

@ModMaamari
Created March 2, 2022 08:58
Show Gist options
  • Save ModMaamari/59c846fb93f3efaf2415d4e883e5d254 to your computer and use it in GitHub Desktop.
Save ModMaamari/59c846fb93f3efaf2415d4e883e5d254 to your computer and use it in GitHub Desktop.
def from_label_studio_to_dataframe( LABEL_STUDIO_URL="",
API_KEY=""):
'''
Goals:
- Load the labeled data from Label Studio
(or from a raw_data dictionary saved locally as a pickle file), clean it, and save it into a panda data frame
Attributes:
- LABEL_STUDIO_URL (url as string): the url for the label studio project you want to get your data from
- API_KEY (string): your Label Studio API_KEY
Returns:
- df (pandas DataFrame): The labeled data as a pandas DataFrame
'''
ls = Client(url=LABEL_STUDIO_URL, api_key=API_KEY)
ls.check_connection()
pro = project.Project.get_from_id(ls, "1")
raw_data = project.Project.get_labeled_tasks(pro)
df = pd.DataFrame(columns=["text", "category", "stage", "level"])
question_tags = [
"Question_1_Company_specific", "Question_1_Market_related",
"Question_2_specific", "Question_2_open",
"Question_3_attack", "Question_3_support", "Question_3_neutral"
]
answer_tags = [
"Answer_1_specific", "Answer_1_avoid_excuse",
"Answer_2_negative", "Answer_2_positive",
"Answer_3_blame", "Answer_3_no_blame"
]
tag2val = {
"Question_1_Company_specific": 0,
"Question_1_Market_related": 1,
"Question_2_specific": 0,
"Question_2_open": 1,
"Question_3_attack": 2,
"Question_3_support": 0,
"Question_3_neutral": 1,
"Answer_1_specific": 0,
"Answer_1_avoid_excuse": 1,
"Answer_2_negative": 0,
"Answer_2_positive": 1,
"Answer_3_blame": 0,
"Answer_3_no_blame": 1
}
for task in tqdm(raw_data):
annotations = task["annotations"][0]["result"]
for ann in annotations:
text = ann["value"]["text"]
label = ann["value"]["labels"][0]
# Ignore any odservation that does not have any of the question and answer stages
# (ex: an observation that only labels the question QID_13)
if len(label) < 7:
continue
# Get the category "Answer" or "Question" from the label
category = "Answer" if "Answer" in label else "Question"
# Get the stage number
stage = int(label.split("_")[1])
# Get the level
level = tag2val[label]
# Add the observation to the dataframe
df = df.append(
{
"text": text,
"category": category,
"stage": stage,
"level": level
},
ignore_index=True)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment