Skip to content

Instantly share code, notes, and snippets.

View octoparse's full-sized avatar

Octoparse octoparse

View GitHub Profile
@octoparse
octoparse / nfl1.py
Created November 11, 2019 04:09
Scraping fantasy football projections
from bs4 import BeautifulSoup
import re
import requests
def get_html_data(url):
response = requests.get(url)
return BeautifulSoup(response.content, "html5lib")
@octoparse
octoparse / gender_analysis_on_movies.py
Last active May 9, 2019 01:24
Data Science: What is the near future of Superheroines?
import collections
import re
def get_first_name(aString):
if not aString:
return aString
ss = aString.replace('*', '').split(' ') # ['Leonard', 'Nimoy*Chris', 'PineZachary', 'QuintoZoe', 'SaldanaKarl']
name_list = [] # result returned for this function
for name in ss:
import re
import json
# save the positive words into a list called p_list
with open('positive.txt') as f:
p_txt = f.read()
p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt)
p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
# test if cool is in the list
print 'cool is in the postive list: ', 'cool' in p_list
# count if it is a positive word
if word in p_list:
if word in word_count_positive.keys():
word_count_positive[word] += 1
else:
word_count_positive[word] = 1
# else see if it is a negative word
elif word in n_list:
if word in word_count_negative.keys():
word_count_negative[word] += 1
for word in word_list:
# count all words frequency
if word in word_count_dict.keys():
word_count_dict[word] += 1
else:
word_count_dict[word] = 1
# create empty dictionaries
word_count_dict = {}
word_count_positive = {}
word_count_negative= {}
txt = f.read()
txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', txt)
word_list = txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
import re
import json
with open('positive.txt') as f:
p_txt = f.read()
p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt)
p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
# test if cool is in the list
print 'cool is in the postive list: ', 'cool' in p_list
with open('negative.txt') as f:
n_txt = f.read()