Skip to content

Instantly share code, notes, and snippets.

View fjcasti1's full-sized avatar

Kiko Castillo fjcasti1

View GitHub Profile
@fjcasti1
fjcasti1 / create_none_values_in_column_set.py
Last active May 4, 2023 19:51
Create some none values in a set of columns
# CREATE NULL VALUES IN SOME ROWS FOR 1 OR MORE COLUMNS
import random
N_rows = 10 # How many rows?
index = random.sample(range(len(df)), N_rows) # Get random indexes for how many rows you specified
col_list = ['col-1','col-2',...] # Select columns to add Nones to
df.loc[index, col_list]=None
@fjcasti1
fjcasti1 / create_none_col.py
Created May 4, 2023 19:15
Create column of nones
# CREATE 1 OR MORE COLUMNS FULL OF NONES
col_list = ['col-1','col-2',...]
df[col_list]=None
@fjcasti1
fjcasti1 / CV_Classification_send_data.py
Created November 8, 2022 01:51
CV_Classification send data
env_names = ['training', 'validation', 'production']
environments = [Environments.TRAINING, Environments.VALIDATION, Environments.PRODUCTION]
dfs = [train_df, val_df, prod_df]
# Logging DataFrames
for env_name, env, df in zip(env_names, environments, dfs):
response = arize_client.log(
dataframe=df,
model_id=model_id,
model_version=model_version,
@fjcasti1
fjcasti1 / CV_Classification_define_schema.py
Created November 8, 2022 01:49
CV_Classification define schema
features = []
arize_columns = [
'prediction_id',
'prediction_ts',
'label',
'pred_label',
'image_vector',
'url'
]
@fjcasti1
fjcasti1 / CV_Classification_setup_client.py
Created November 8, 2022 01:47
CV_Classification setup client
SPACE_KEY = "SPACE_KEY"
API_KEY = "API_KEY"
arize_client = Client(space_key=SPACE_KEY, api_key=API_KEY)
model_id = "CV-demo-fashion-mnist-quality-drift"
model_version = "1.0"
model_type = ModelTypes.SCORE_CATEGORICAL
if SPACE_KEY == "SPACE_KEY" or API_KEY == "API_KEY":
raise ValueError("❌ NEED TO CHANGE SPACE AND/OR API_KEY")
else:
print("✅ Import and Setup Arize Client Done! Now we can start using Arize!")
@fjcasti1
fjcasti1 / CV_Classification_map_labels_to_class_names.py
Created November 8, 2022 01:41
CV_Classification map labels to class names
train_df['label'] = train_df['label'].map(lambda label: id2label[label])
train_df['pred_label'] = train_df['pred_label'].map(lambda label: id2label[label])
val_df['label'] = val_df['label'].map(lambda label: id2label[label])
val_df['pred_label'] = val_df['pred_label'].map(lambda label: id2label[label])
prod_df['label'] = prod_df['label'].map(lambda label: id2label[label])
prod_df['pred_label'] = prod_df['pred_label'].map(lambda label: id2label[label])
@fjcasti1
fjcasti1 / CV_Clasification_postprocessing.py
Created November 8, 2022 01:37
CV_Clasification postprocessing
def postprocess(batch):
inputs = feature_extractor([x.convert("RGB") for x in batch["image"]], return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
pred_labels = torch.argmax(outputs.logits, dim=1).cpu().numpy()
last_hidden_states = outputs.hidden_states[-1]
embeddings = torch.mean(last_hidden_states, (2, 3)).cpu().numpy()
@fjcasti1
fjcasti1 / CV_Classification_training.py
Created November 8, 2022 01:32
CV_Classification training
trainer = Trainer(
model=model,
args=training_args,
data_collator=collate_fn,
train_dataset=train_ds,
eval_dataset=val_ds,
tokenizer=feature_extractor,
compute_metrics=compute_metrics,
)
@fjcasti1
fjcasti1 / CV_Classification_collate_fn.py
Created November 8, 2022 01:31
CV_Classification collate fn
def collate_fn(dataset):
pixel_values = torch.stack([ds["pixel_values"] for ds in dataset])
labels = torch.tensor([ds["label"] for ds in dataset])
return {"pixel_values": pixel_values, "labels": labels}
@fjcasti1
fjcasti1 / CV_Classification_compute_metrics.py
Created November 8, 2022 01:31
CV_Classification compute metrics
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions[0].argmax(-1)
f1 = f1_score(labels, preds, average="weighted")
acc = accuracy_score(labels, preds)
return {"accuracy": acc, "f1": f1}