Skip to content

Instantly share code, notes, and snippets.

@MrN00b0t
Created May 7, 2020 10:24
Show Gist options
  • Save MrN00b0t/281bc740d0f017529c24cb33e8dcf2ac to your computer and use it in GitHub Desktop.
Save MrN00b0t/281bc740d0f017529c24cb33e8dcf2ac to your computer and use it in GitHub Desktop.
Codecademy: This is Jeopardy!
import pandas as pd
pd.set_option('display.max_colwidth', -1)
#Load csv into Data Frame
jeopardy = pd.read_csv('jeopardy.csv')
#Investigate DataFrame Structure
print(jeopardy.head())
#Does not show full DataFrame; try different method
for col in jeopardy.columns:
print(col)
#Column names are not compatible as variable names, so reformat and rename them.
for col in jeopardy.columns:
colnew = col.strip().lower().replace(' ', '_')
jeopardy.rename(columns={col: colnew}, inplace= True)
# Get a feel for data held in each column
print(jeopardy[colnew])
# New column names are show_number(int), air_date(yyyy-mm-dd), round(str), category(str), value(str), question(str) and answer(str)
# Searching a dataset for a list of words; convert haystack and needle to lower case before performing search
def find_question(frame, wordlist):
search = lambda x: all(word.lower() in x.lower() for word in wordlist)
return frame.loc[frame['question'].apply(search)]
# Test Function
print(find_question(jeopardy, ['Chief', 'Justice']))
#Create column float_value containing formatted and floated values, replacing Final Jeopardy values ('None') with 0.0
jeopardy['float_value'] = jeopardy.value.apply(lambda value: float(value.replace('$', '').replace(',', '')) if value != 'None' else 0.0)
#Test lambda function
print(jeopardy.float_value)
#Determine mean difficulty of question containing a word
difficulty = find_question(jeopardy, ['King'])
print(difficulty.float_value.mean())
#Determine all unique answers to question containing search word and count
print(difficulty.answer.value_counts())
#Calculating number of questions containing word "computer" for each decade
#Add column to dataframe formatted to year
jeopardy['question_year'] = jeopardy.air_date.apply(lambda x: x[:4])
#Use find_question function and GROUP BY question_year
computer = find_question(jeopardy, ['Computer'])
computer_by_year = computer.groupby('question_year').show_number.count().reset_index()
#Select rows where question_year is in the 90s
computer_90s = computer_by_year[(computer_by_year.question_year < '2000') & (computer_by_year.question_year > '1989')]
#Select rows where question_year is in the 2000s
computer_2000s = computer_by_year[(computer_by_year.question_year < '2010') & (computer_by_year.question_year > '1999')]
#Calculate the total number of questions containing the search term by decade and print to terminal
print("The number of questions featuring the word \"computer\" in the 1990s was " + str(computer_90s.show_number.sum()) + " whereas the number of questions containing the word \"computer\" in the 2000s was " + str(computer_2000s.show_number.sum()))
#Display number of instances of category occuring in particular round
category_round = jeopardy.groupby(['category', 'round']).show_number.count().reset_index()
#Plot in a pivot table to increase readability
category_round_pivot = category_round.pivot(columns= 'round', index= 'category', values= 'show_number').reset_index()
#Rename columns
category_round_pivot.columns = ['category', 'double', 'final', 'single']
#Display resulting pivot table
print(category_round_pivot)
#To find data on specific category
literature = category_round_pivot[(category_round_pivot.category == 'LITERATURE')]
print(literature)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment