Skip to content

Instantly share code, notes, and snippets.

Octoparse octoparse

Block or report user

Report or block octoparse

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@octoparse
octoparse / nfl1.py
Created Nov 11, 2019
Scraping fantasy football projections
View nfl1.py
from bs4 import BeautifulSoup
import re
import requests
def get_html_data(url):
response = requests.get(url)
return BeautifulSoup(response.content, "html5lib")
@octoparse
octoparse / gender_analysis_on_movies.py
Last active May 9, 2019
Data Science: What is the near future of Superheroines?
View gender_analysis_on_movies.py
import collections
import re
def get_first_name(aString):
if not aString:
return aString
ss = aString.replace('*', '').split(' ') # ['Leonard', 'Nimoy*Chris', 'PineZachary', 'QuintoZoe', 'SaldanaKarl']
name_list = [] # result returned for this function
for name in ss:
View c5a44b9288ab-code.py
import re
import json
# save the positive words into a list called p_list
with open('positive.txt') as f:
p_txt = f.read()
p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt)
p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
# test if cool is in the list
print 'cool is in the postive list: ', 'cool' in p_list
View c5a44b9288ab-part5.py
# count if it is a positive word
if word in p_list:
if word in word_count_positive.keys():
word_count_positive[word] += 1
else:
word_count_positive[word] = 1
# else see if it is a negative word
elif word in n_list:
if word in word_count_negative.keys():
word_count_negative[word] += 1
View c5a44b9288ab-part4.py
for word in word_list:
# count all words frequency
if word in word_count_dict.keys():
word_count_dict[word] += 1
else:
word_count_dict[word] = 1
View c5a44b9288ab-part3.py
# create empty dictionaries
word_count_dict = {}
word_count_positive = {}
word_count_negative= {}
View c5a44b9288ab-part2.py
txt = f.read()
txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', txt)
word_list = txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
View c5a44b9288ab-part1.py
import re
import json
with open('positive.txt') as f:
p_txt = f.read()
p_txt = re.sub('[,\.()":;!@#$%^&*\d]|\'s|\'', '', p_txt)
p_list = p_txt.replace('\n',' ').replace(' ',' ').lower().split(' ')
# test if cool is in the list
print 'cool is in the postive list: ', 'cool' in p_list
with open('negative.txt') as f:
n_txt = f.read()
You can’t perform that action at this time.