Created
January 22, 2020 22:23
-
-
Save samdobson/ae86d6e0f0b465485aa214644f0811cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import math | |
# Read data. | |
with open('datafile.json') as f: | |
s = f.read().replace('{"n":"', '"').replace('{"s":"', '"').replace('"}', '"') | |
df = pd.read_json(s, lines=True) | |
# Re-shape dataframe with one line per guess. | |
lst_col = 'valid_guesses' | |
x = df.assign(**{lst_col:df[lst_col].str.split(',')}) | |
df2 = pd.DataFrame({ | |
col:np.repeat(x[col].values, x[lst_col].str.len()) | |
for col in x.columns.difference([lst_col]) | |
}).assign(**{lst_col:np.concatenate(x[lst_col].values)})[x.columns.tolist()] | |
df2 = df2[['id', 'valid_guesses', 'actualNumber']] | |
test = df2 | |
# Define new fields | |
test['lowerBound'] = np.nan | |
test['upperBound'] = np.nan | |
test['midPoint'] = np.nan | |
test['risk'] = np.nan | |
test['attemptNum'] = np.nan | |
test['outOfBounds'] = 0 | |
# Make fields numeric. | |
cols = ['valid_guesses', 'lowerBound', 'upperBound', 'actualNumber'] | |
test[cols] = test[cols].apply(pd.to_numeric, errors='coerce', axis=1) | |
# Add guess context. | |
attemptNum = 1 | |
lower = 1 | |
upper = 100 | |
midpoint = 0 | |
for index, row in test.iterrows(): | |
row = row.copy() | |
test.loc[index, 'lowerBound'] = lower | |
test.loc[index, 'upperBound'] = upper | |
test.loc[index, 'midPoint'] = (upper + lower) / 2 | |
test.loc[index, 'risk'] = math.floor(abs(row['valid_guesses'] - test.loc[index, 'midPoint'])) | |
test.loc[index, 'attemptNum'] = attemptNum | |
attemptNum += 1 | |
# Don't update bounds for out of bounds guesses | |
if (row['valid_guesses'] < lower) or (row['valid_guesses'] > upper): | |
test.loc[index, 'outOfBounds'] = 1 | |
continue | |
if row['actualNumber'] < row['valid_guesses']: | |
upper = row['valid_guesses'] - 1 | |
elif row['actualNumber'] > row['valid_guesses']: | |
lower = row['valid_guesses'] + 1 | |
elif row['valid_guesses'] == row['actualNumber']: | |
upper = 100 | |
lower = 1 | |
attemptNum = 1 | |
test['risk'] = test['risk'] / 50 | |
test['perfectGuess'] = np.where(test['risk']==0, '1', '0') | |
test.round(2) | |
# Write guesses to file. | |
test.to_csv(output_folder + 'guesses.csv', | |
columns=['id', 'actualNumber', 'attemptNum', 'valid_guesses', 'risk', 'perfectGuess', 'outOfBounds', 'lowerBound', 'upperBound'], | |
header=['gameId', 'targetNum', 'attemptNum', 'guess', 'risk', 'perfectGuess', 'outOfBounds', 'lowerBound', 'upperBound'], | |
index=False | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment