Skip to content

Instantly share code, notes, and snippets.

@zackster
Created August 1, 2021 17:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zackster/5eb062f1fd2ef3ca845ee65769f5e980 to your computer and use it in GitHub Desktop.
Save zackster/5eb062f1fd2ef3ca845ee65769f5e980 to your computer and use it in GitHub Desktop.
linear regression to predict solpunk price based on sales data
import json
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
def list_traits(a):
return list(map(lambda x: " ".join(x.split(' ')[0:-1]), a.split(',')))
# Load list of all punks
with open('solpunks.json', 'r') as pr:
data = pr.read()
punk_json = json.loads(data)
punks = {}
all_traits = set()
for punk in punk_json['punks']:
# Skip SatoshiStreetBets
if 'id' not in punk:
continue
if punk['attributes'] == 'NULL':
punk_traits = ['NoTraits']
else:
punk_traits = list_traits(punk['attributes'])
punks[punk['id']] = {
"ranking": punk["ranking"],
"skin": punk["skin"],
"type": punk["type"],
"traits": punk_traits
}
for trait in punk_traits:
all_traits.add(trait)
for punk_id,punk in punks.items():
my_traits = punk['traits']
del punk['traits']
for trait in all_traits:
punk[trait] = False
for trait in my_traits:
punk[trait] = True
# Load pricing data
with open('all_solpunks_sold.json') as sales_data:
sales = json.loads(sales_data.read())
sales.reverse()
for sale in sales:
punk_id = sale['name']
# Skip SatoshiStreetBets
try:
punk_id = punk_id.split('#')[1].zfill(4)
except:
continue
punks[punk_id]['price'] = sale['price']
df = pd.DataFrame(punks.values())
non_dummy_cols = ['ranking', 'price']
X = df
dummy_cols = list(set(X.columns) - set(non_dummy_cols))
all_X = pd.get_dummies(X, columns=dummy_cols, drop_first=True)
X = all_X[all_X.price.notnull()]
Y = X['price']
# We don't want to train on price, we want to predict it
del X['price']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = .20, random_state = 40)
regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train)
predicted = regr.predict(X_test)
import pdb
pdb.set_trace()
# To get prediction for punk id #9203, use
# regr.predict(all_X)[9203]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment