Skip to content

Instantly share code, notes, and snippets.

View khaledadrani's full-sized avatar
💭
Doing stuff

Khaled Adrani khaledadrani

💭
Doing stuff
View GitHub Profile
import spacy
nlp = spacy.load('en_core_web_md')
def annotate_text(doc):
ls = []
for ent in doc.ents:
entry = dict()
entry['text'] = ent.text
@khaledadrani
khaledadrani / filter_tweets_quality.py
Last active December 21, 2021 17:52
Filter_bad_tweets
import re
def filter_tweets(selected):
'''
Filter out any tweet that ends with three dots (indicating it is linking to an external source and thus lacking in information)
And also filter out any tweet that is longer than 200 characters.
'''
filtered = []
url_pattern = "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
for text in selected.text:
@khaledadrani
khaledadrani / praw_hot_posts.py
Last active December 13, 2021 09:15
starter code for praw python
for post in subreddit.hot(limit=5):
print(post.title)
print()