Skip to content

Instantly share code, notes, and snippets.

View sevperez's full-sized avatar

Severin Perez sevperez

View GitHub Profile
s = [1, 2, 3, 4, 5]
def foo(l, bar):
return list(map(bar, l))
print(foo(s, lambda x: x * 2)) # [2, 4, 6, 8, 10]
# What is the sentiment surrounding each character?
def sentiment_descriptor_to_val(descriptor):
"""
- Parameters: descriptor ("negative", "neutral", or "positive")
- Returns: -1 for "negative", 0 for "neutral", 1 for "positive"
"""
if descriptor == "negative":
return -1
elif descriptor == "neutral":
return 0
# Which characters appear most frequently?
def frequency_count(df, col, limit=10):
return df[col].value_counts().head(limit)
frequency_count(characters, "text")
# Ahab 474
# Stubb 224
# Queequeg 184
# Starbuck 140
def num_unique_items(df, col):
return len(df[col].unique())
num_unique_items(characters, "text")
# 699
# select person entities
def select_person_entities(doc):
return [ent for ent in doc.entities if ent.type == "PERSON"]
def person_df(doc):
"""
- Parameters: doc (a Stanza Document object)
- Returns: A Pandas DataFrame with one row for each entity in doc
that has a "PERSON" type, and and columns text, type, start_char,
and the sentiment of the sentence in which the entity appears.
# load the full text and put it through the pipeline
def load_text_doc(file_path):
with open(file_path) as f:
txt = f.read()
return txt
moby_path = "moby_dick.txt"
moby_dick_text = load_text_doc(moby_path)
moby_dick = nlp(moby_dick_text)
def sentiment_descriptor(sentence):
"""
- Parameters: sentence (a Stanza Sentence object)
- Returns: A string descriptor for the sentiment value of sentence.
"""
sentiment_value = sentence.sentiment
if (sentiment_value == 0):
return "negative"
elif (sentiment_value == 1):
return "neutral"
def print_entity_info(entity):
print(f"Text:\t{entity.text}")
print(f"Type:\t{entity.type}")
print(f"Start:\t{entity.start_char}")
print(f"End:\t{entity.end_char}")
print_entity_info(moby_p1.entities[0])
# Text: Ishmael
# Type: PERSON
def print_word_info(word):
print(f"Text:\t{word.text}")
print(f"Lemma: \t{word.lemma}")
print(f"UPOS: \t{word.upos}")
print(f"XPOS: \t{word.xpos}")
print_word_info(moby_p1.sentences[3].words[4])
# Text: growing
# Lemma: grow
def print_token_info(token):
print(f"Text:\t{token.text}")
print(f"Start:\t{token.start_char}")
print(f"End:\t{token.end_char}")
print_token_info(moby_p1.sentences[0].tokens[2])
# Text: Ishmael
# Start: 8
# End: 15