Kiko Castillo fjcasti1

## create_none_values_in_column_set.py
# CREATE NULL VALUES IN SOME ROWS FOR 1 OR MORE COLUMNS
import random
N_rows = 10 # How many rows?
index = random.sample(range(len(df)), N_rows) # Get random indexes for how many rows you specified
col_list = ['col-1','col-2',...] # Select columns to add Nones to
df.loc[index, col_list]=None

## create_none_col.py
# CREATE 1 OR MORE COLUMNS FULL OF NONES
col_list = ['col-1','col-2',...]
df[col_list]=None

## CV_Classification_send_data.py
env_names = ['training', 'validation', 'production']
environments = [Environments.TRAINING, Environments.VALIDATION, Environments.PRODUCTION]
dfs = [train_df, val_df, prod_df]

# Logging DataFrames
for env_name, env, df in zip(env_names, environments, dfs):
    response = arize_client.log(
        dataframe=df,
        model_id=model_id,
        model_version=model_version,

## CV_Classification_define_schema.py
features = []
arize_columns = [
    'prediction_id',
    'prediction_ts',
    'label',
    'pred_label',
    'image_vector',
    'url'
]

## CV_Classification_setup_client.py
SPACE_KEY = "SPACE_KEY"
API_KEY = "API_KEY"
arize_client = Client(space_key=SPACE_KEY, api_key=API_KEY)
model_id = "CV-demo-fashion-mnist-quality-drift"
model_version = "1.0"
model_type = ModelTypes.SCORE_CATEGORICAL
if SPACE_KEY == "SPACE_KEY" or API_KEY == "API_KEY":
    raise ValueError("❌ NEED TO CHANGE SPACE AND/OR API_KEY")
else:
    print("✅ Import and Setup Arize Client Done! Now we can start using Arize!")

## CV_Classification_map_labels_to_class_names.py
train_df['label'] = train_df['label'].map(lambda label: id2label[label])
train_df['pred_label'] = train_df['pred_label'].map(lambda label: id2label[label])

val_df['label'] = val_df['label'].map(lambda label: id2label[label])
val_df['pred_label'] = val_df['pred_label'].map(lambda label: id2label[label])

prod_df['label'] = prod_df['label'].map(lambda label: id2label[label])
prod_df['pred_label'] = prod_df['pred_label'].map(lambda label: id2label[label])

## CV_Clasification_postprocessing.py
def postprocess(batch):
    inputs = feature_extractor([x.convert("RGB") for x in batch["image"]], return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)

    pred_labels = torch.argmax(outputs.logits, dim=1).cpu().numpy()

    last_hidden_states = outputs.hidden_states[-1]
    embeddings = torch.mean(last_hidden_states, (2, 3)).cpu().numpy()

## CV_Classification_training.py
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
)

## CV_Classification_collate_fn.py
def collate_fn(dataset):
    pixel_values = torch.stack([ds["pixel_values"] for ds in dataset])
    labels = torch.tensor([ds["label"] for ds in dataset])
    return {"pixel_values": pixel_values, "labels": labels}

## CV_Classification_compute_metrics.py
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions[0].argmax(-1)
    f1 = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}
	# CREATE NULL VALUES IN SOME ROWS FOR 1 OR MORE COLUMNS
	import random
	N_rows = 10 # How many rows?
	index = random.sample(range(len(df)), N_rows) # Get random indexes for how many rows you specified
	col_list = ['col-1','col-2',...] # Select columns to add Nones to
	df.loc[index, col_list]=None
	# CREATE 1 OR MORE COLUMNS FULL OF NONES
	col_list = ['col-1','col-2',...]
	df[col_list]=None
	env_names = ['training', 'validation', 'production']
	environments = [Environments.TRAINING, Environments.VALIDATION, Environments.PRODUCTION]
	dfs = [train_df, val_df, prod_df]

	# Logging DataFrames
	for env_name, env, df in zip(env_names, environments, dfs):
	response = arize_client.log(
	dataframe=df,
	model_id=model_id,
	model_version=model_version,
	features = []
	arize_columns = [
	'prediction_id',
	'prediction_ts',
	'label',
	'pred_label',
	'image_vector',
	'url'
	]
	SPACE_KEY = "SPACE_KEY"
	API_KEY = "API_KEY"
	arize_client = Client(space_key=SPACE_KEY, api_key=API_KEY)
	model_id = "CV-demo-fashion-mnist-quality-drift"
	model_version = "1.0"
	model_type = ModelTypes.SCORE_CATEGORICAL
	if SPACE_KEY == "SPACE_KEY" or API_KEY == "API_KEY":
	raise ValueError("❌ NEED TO CHANGE SPACE AND/OR API_KEY")
	else:
	print("✅ Import and Setup Arize Client Done! Now we can start using Arize!")
	train_df['label'] = train_df['label'].map(lambda label: id2label[label])
	train_df['pred_label'] = train_df['pred_label'].map(lambda label: id2label[label])

	val_df['label'] = val_df['label'].map(lambda label: id2label[label])
	val_df['pred_label'] = val_df['pred_label'].map(lambda label: id2label[label])

	prod_df['label'] = prod_df['label'].map(lambda label: id2label[label])
	prod_df['pred_label'] = prod_df['pred_label'].map(lambda label: id2label[label])
	def postprocess(batch):
	inputs = feature_extractor([x.convert("RGB") for x in batch["image"]], return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model(**inputs)

	pred_labels = torch.argmax(outputs.logits, dim=1).cpu().numpy()

	last_hidden_states = outputs.hidden_states[-1]
	embeddings = torch.mean(last_hidden_states, (2, 3)).cpu().numpy()
	trainer = Trainer(
	model=model,
	args=training_args,
	data_collator=collate_fn,
	train_dataset=train_ds,
	eval_dataset=val_ds,
	tokenizer=feature_extractor,
	compute_metrics=compute_metrics,
	)
	def collate_fn(dataset):
	pixel_values = torch.stack([ds["pixel_values"] for ds in dataset])
	labels = torch.tensor([ds["label"] for ds in dataset])
	return {"pixel_values": pixel_values, "labels": labels}
	def compute_metrics(pred):
	labels = pred.label_ids
	preds = pred.predictions[0].argmax(-1)
	f1 = f1_score(labels, preds, average="weighted")
	acc = accuracy_score(labels, preds)
	return {"accuracy": acc, "f1": f1}