Skip to content

Instantly share code, notes, and snippets.

View dreyco676's full-sized avatar

John Hogue dreyco676

View GitHub Profile
@dreyco676
dreyco676 / pyspark_list_to_column
Created October 9, 2018 04:22
PySpark List Column to Boolean Columns for each value
from pyspark.sql.functions import split, explode, lit, coalesce, first
# split 'ROOF' column by comma
df = df.withColumn('roof_list', split(df['ROOF'], ', '))
# explode each value to a new record
ex_df = df.withColumn('ex_roof_list', explode(df['roof_list']))
# create a new record to agg by later
ex_df = ex_df.withColumn('constant_val', lit(1))
@dreyco676
dreyco676 / getWeFollow.py
Created May 18, 2015 16:12
Python We Follow Scraper
import requests
from bs4 import BeautifulSoup
# Get top influencers from WeFollow
def get_influencers(topic, min_rank, max_rank):
url = 'http://wefollow.com/interest/'
req_url = url + topic + '/' + str(min_rank) + '-' + str(max_rank)
response = requests.get(req_url)
soup = BeautifulSoup(response.content)