Skip to content

Instantly share code, notes, and snippets.

View oaguy1's full-sized avatar

Lily Hughes-Robinson oaguy1

View GitHub Profile
<script src="https://code.jquery.com/jquery-1.11.3.min.js"></script>
<link href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css" rel="stylesheet">
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/js/bootstrap.min.js"></script>
<link rel="stylesheet" href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css">
<style>
a:hover {
text-decoration: none;
}
h1 {
@oaguy1
oaguy1 / corpus_eda.py
Last active June 5, 2020 19:16
Simple Reddit EDA for NLP
import pandas as pd
import praw
from praw.models import Comment
client_id = "your actual client id"
client_secret = "your client secret"
user_agent = "your user agent"
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)
@oaguy1
oaguy1 / remove_stop_words.py
Last active June 2, 2020 22:31
Removing stop words from a raw natrual language text
import spacy
nlp = spacy.load('en_core_web_sm')
# comments is an array of strings we generated earlier
parsed_bodies = [nlp(comm) for comm in comments]
cleaned = []
for doc in parsed_bodies:
current = []
for token in doc:
import re
raw_comment = "I hate u/oaguy1 and u/example123"
reddit_rx = re.compile(r"\b/?u/[\w-]{3,20}\b")
#returns "I hate USERNAME and USERNAME"
masked_comment = re.sub(reddit_rx, lambda x: "USERNAME", raw_comment)