Skip to content

Instantly share code, notes, and snippets.

View maxwellbade's full-sized avatar

Max Bade maxwellbade

View GitHub Profile
def timeline_plot(df,x_axis,y_axis,index_col,title,height,width,template):
#dummy column for yaxis
def even_odd(row):
if row[index_col] % 2 == 0:
return 1
else:
return -1
df[y_axis] = df.apply(lambda row: even_odd(row), axis = 1)
def experiment_forecast(df,date_col,experiment_name
,metric_name,variant_name
,steps,title):
import statsmodels.api as sm
#get dates and metric
df = df
df_dau = df[df['experiment_name'] == experiment_name]
df_dau = df_dau[df_dau['metric_name'] == metric_name]
start_time = datetime.now()
print('Start Time: ', start_time)
import bs4
import requests
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import time
import pandas as pd
import requests
import re
import json
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
city = 'austin/' #*****change this city to what you want!!!!*****
import requests
import re
import json
import time
import warnings
warnings.filterwarnings('ignore')
city = 'austin/' #*****change this city to what you want!!!!*****
import requests
import re
import json
import warnings
warnings.filterwarnings('ignore')
url='https://www.zillow.com/homes/for_sale/2_p/?searchQueryState=%7B%22pagination%22%3A%7B%22currentPage%22%3A2%7D%2C%22mapBounds%22%3A%7B%22west%22%3A-97.88981437683105%2C%22east%22%3A-97.78407096862793%2C%22south%22%3A30.136145838104586%2C%22north%22%3A30.208863801102932%7D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22days%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A13%7D'
data = json.loads(re.search(r'!--(\{"queryState".*?)-->', r.text).group(1))
#single attributes
new_rules['softblocked'] = new_rules['softblocked'].astype(float)
new_rules = new_rules.sort_values(by='softblocked',ascending=True)
fig = px.scatter(new_rules
,x='softblocked_rate'
,y='softblocked'
,color='groupcols'
,log_y=True
,template='plotly_dark'
start_time = datetime.now()
print('Start Time: ', start_time)
cols = [
'avg_posts_per_day'
,'max_posts_per_day_bucket'
,'days_since_registered_bucket'
,'shortest_time_bt_posts_seconds_bucket'
,'count_users_per_ip_bucket'
,'count_dmas_bucket'
#do not insert columns with null values
def fraud_rate(df, agg, cols=None, threshold=None, limit=None, days=None, minrate=None, maxrate=None):
if isinstance(cols, str):
groupcols = [cols]
elif cols is None:
groupcols = []
else:
try:
groupcols = list(cols)
except:
from fuzzywuzzy import process, fuzz
def fuzzy(col, risky_words, score):
fuzzy_words, score = process.extractOne(col, risky_words, scorer=score)
if score<85:
return 'no_high_matches'
else:
return fuzzy_words,score
start_time = datetime.now()