Skip to content

Instantly share code, notes, and snippets.

@gumdropsteve
Created November 27, 2019 20:03
Show Gist options
  • Save gumdropsteve/7323fd4ed393de018b242bf09c60517f to your computer and use it in GitHub Desktop.
Save gumdropsteve/7323fd4ed393de018b242bf09c60517f to your computer and use it in GitHub Desktop.
from blazingsql import BlazingContext
import cudf
# cuDF DataFrame from CSV stored external via URL
turkey_poll = cudf.read_csv('https://query.data.world/s/ss47hkdmqe5d6353neouv4ourm2ous')
# make columns easier to work with
new_cols = []
for col in turkey_poll.columns:
# replace spaces w/ underscore and drop question & quotation marks
new_cols.append(col.replace(' ', '_').replace('?', '').replace('"', ''))
turkey_poll.columns = new_cols
# start up BlazingSQL
bc = BlazingContext()
# BlazingSQL table from cuDF DataFrame
bc.create_table('thanksgiving_poll', turkey_poll)
# find age, area type, friendsgiving status, work status & turkey status of west coast
query = '''
SELECT
Age AS age,
CASE
WHEN How_would_you_describe_where_you_live = 'Urban' THEN 1
ELSE 0
END AS city_bool,
CASE
WHEN Have_you_ever_attended_a_Friendsgiving = 'Yes' THEN 1
ELSE 0
END AS friendsgiving_bool,
CASE
WHEN Will_you_employer_make_you_work_on_Black_Friday = 'Yes' THEN 1
ELSE 0
END AS work_friday_bool,
CASE
WHEN What_is_typically_the_main_dish_at_your_Thanksgiving_dinner = 'Turkey' THEN 1
ELSE 0
END AS turkey_bool
FROM
thanksgiving_poll
WHERE
US_Region like 'West%'
'''
# run query (type(results) == cudf.core.dataframe.DataFrame)
result_gdf = bc.sql(query)
# what're we lookin at?
print(result_gdf.head())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment