Skip to content

Instantly share code, notes, and snippets.

@zredlined
zredlined / kdd_2024_synthetic_data_talk_notes.md
Created August 23, 2024 15:23
This page provides notes for my KDD 2024 talk called "NL2Code - Powering the Shift from Code to Conversation".
@zredlined
zredlined / evaluation_prompt.py
Created July 11, 2024 02:59
Evaluation prompt provided to Judge model to choose winning generation (AI Feedback)
prompt = f"""
Compare the quality of the following two instruction-response pairs:
Version 1 ({version1_name}):
Instruction: {version1['instruction']}
Context: {version1['context']}
Response: {version1['response']}
CUDA_VISIBLE_DEVICES=0 python train.py
--gpu_ids 0 --dataroot ./datasets/ebike_data
--name locations_FastCUT --CUT_mode FastCUT --n_epochs 30
@zredlined
zredlined / delete_unshared_projects.py
Created January 10, 2022 18:05
Delete any projects in Gretel that are not shared to more than one person
import logging
from gretel_client import configure_session, ClientConfig, projects
from gretel_client.helpers import do_api_call
logging.getLogger().setLevel(logging.INFO)
def clean_projects():
""" Delete any projects that are not shared to more than one person """
for project in projects.search_projects():
def plot_timeseries(filenames: list):
""" Plot a list of timeseries CSVs to a single graph """
combined_df = pd.DataFrame()
for filename in filenames:
df = pd.read_csv(filename)
df['date'] = pd.to_datetime(df['date'])
df['label'] = filename
combined_df = pd.concat([combined_df, df])
# Use the model to generate synthetic datasets
seed_data = json.loads(trends_df[seed_fields].to_json(orient="records"))
for dataset in range(5):
model.generate(max_invalid=1e5,
num_proc=1, # disable paralellism when using seed data
seed_fields=seed_data)
df = restore_daily(model.get_synthetic_df(), start, trend_col)
df.to_csv(f'synthetic-data-{dataset}.csv', index=False)
# Create the Gretel Synthtetics Training / Model Configuration
from pathlib import Path
from gretel_helpers.synthetics import create_df, SyntheticDataBundle
config_template = {
"epochs": 50,
"early_stopping": False,
"vocab_size": 20,
"reset_states": False,
# Load and preview dataset
import datetime
import pandas as pd
import numpy as np
day = 24 * 60 * 60
year = 365.2425 * day
def load_dataframe() -> pd.DataFrame:
@zredlined
zredlined / setup_tensorflow_gpu.sh
Last active May 7, 2021 18:25
Install TensorFlow 2.4 with GPU support
#!/bin/bash
# Initialize conda environment
echo 'Create Conda env? Type y or n and then press [ENTER]:'
read create_env
if [ $create_env = "y" ];
then
echo "Provide name for Conda virtualenv and then press [ENTER]:"
read env_name