Skip to content

Instantly share code, notes, and snippets.

View BexTuychiev's full-sized avatar
🏠
Working from home

bexgboost BexTuychiev

🏠
Working from home
View GitHub Profile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import lightning as L
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from lightning.pytorch.callbacks import ModelCheckpoint
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
import gradio as gr
# Load the diamonds dataset
import json
import pandas as pd
from joblib import dump
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
def preprocess_data(data_path, test_size=0.2, target_name="price"):
"""
Loads data, splits into train/test, performs normalization and one-hot encoding,
saves preprocessed data with targets as CSV files.
import pandas as pd
from xgboost import XGBClassifier
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from google.cloud import storage
import joblib
# Path to your CSV file in GCS bucket
gcs_path = "gs://vertex-tutorial-bucket-bex"
Feature Jupyter Notebooks Databricks Notebooks
Platform Open-source, runs locally or on cloud platforms Exclusive to the Databricks platform
Collaboration and Sharing Limited collaboration features, manual sharing Built-in collaboration, real-time concurrent editing
Execution Relies on local or external servers Execution on Databricks clusters
Integration with Big Data Can be integrated with Spark, requires additional configurations Native integration with Apache Spark, optimized for big data
Built-in Features External tools/extensions for version control, collaboration, and visualization Integrated with Databricks-specific features like Delta Lake, built-in support for collaboration and analytics tools
Cost and Scaling Local installations are often free, cloud-based solutions may have costs Paid service, costs depend on usage, scales seamlessly with Databricks clusters
Ease of Use Familiar and widely used in the data science commun
import pandas as pd
import seaborn as sns
# Load the dataset from Seaborn
diamonds = sns.load_dataset("diamonds")
# Create a Pandas DataFrame
df = pd.DataFrame(diamonds)
# Save the DataFrame directly as a Parquet file
import pandas as pd
import seaborn as sns
# Load the dataset from Seaborn
diamonds = sns.load_dataset("diamonds")
# Create a Pandas DataFrame
df = pd.DataFrame(diamonds)
# Save the DataFrame directly as a Parquet file
This is a test gist. 13894950uijklakd#$%^&*'\\./