Skip to content

Instantly share code, notes, and snippets.

@databyjp
databyjp / export_weaviate_data.py
Last active March 14, 2024 16:49
Rough script to export data from Weaviate to a series of JSON files
import weaviate
from weaviate.collections import Collection
from weaviate.collections.classes.types import GeoCoordinate
from tqdm import tqdm
from typing import List
import os
import json
from datetime import date, datetime
client = weaviate.connect_to_wcs(
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/sbin</string>
</dict>
<key>ExitTimeOut</key>
yr_ranges = None
if len(sys.argv) < 3:
logger.info(f'Season year arguments not provided')
else:
try:
if 2040 > int(sys.argv[1]) > 1980 and 2040 > int(sys.argv[2]) > 1980:
yr_ranges = [int(sys.argv[1]), int(sys.argv[2])]
else:
logger.error(f'The arguments should season years (e.g. 2021 2020) when data is available')
except:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/sbin</string>
</dict>
<key>KeepAlive</key>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>EnvironmentVariables</key>
<dict>
<key>PATH</key>
<string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/sbin</string>
</dict>
<key>Label</key>
for gm_id in gm_ids:
shot_blot_dfs = list()
gm_df = playoffs_df[playoffs_df.GAME_ID == gm_id]
game_date = gm_df["realtime_dt"].min().date()
game_date_str = f"{game_date.day}_{game_date.strftime('%b')}_{game_date.strftime('%Y')}"
tm_ids = gm_df.teamId.unique()
tm_abvs = [teams.find_team_name_by_id(tm_id)['abbreviation'] for tm_id in tm_ids]
for tm_id in tm_ids:
tmp_dfs = list()
tm = teams.find_team_name_by_id(tm_id)
fig = px.scatter(shot_blot_df,
title=f'{latest_day_str} - Playoff game shot profiles',
x="filt_avg", y="segment", size="pts_pct_x",
color="shot_ev", color_continuous_scale=px.colors.sequential.Blues,
facet_row="group", facet_col="shot_type",
template="plotly_white", width=1200, height=750,
range_color=[0.7, 1.7],
labels={'filt_avg': 'Distance from the rim', 'segment': 'Sample size',
'pts_pct_x': 'Proportion of points', 'shot_ev': 'Expected<BR>points<BR>per shot'}
)
import plotly.express as px
fig = px.scatter(shot_blot_df,
title=f'{latest_day_str} - Playoff game shot profiles',
x="filt_avg", y="segment", size="shot_freq_x",
color="shot_acc_x", color_continuous_scale=px.colors.sequential.Blues,
facet_row="group",
template="plotly_white", width=1200, height=750,
labels={'filt_avg': 'Distance from the rim', 'segment': 'Sample size',
'pts_pct_x': 'Proportion of points', 'shot_ev': 'Expected<BR>points<BR>per shot'}
)
import pandas as pd
import utils
from scipy.spatial.distance import cosine
from nba_api.stats.static import teams
shots_df = utils.load_shots_df()
gdf = utils.get_shot_dist_df(shots_df)
# FILTER GAMES FOR THE LATEST DAY
day_df = shots_df[shots_df["timeActual"].dt.date == shots_df["timeActual"].dt.date.max()]