Skip to content

Instantly share code, notes, and snippets.

View tanaymeh's full-sized avatar
🏖️
Working Remotely

Tanay Mehta tanaymeh

🏖️
Working Remotely
View GitHub Profile
@tanaymeh
tanaymeh / fused_gelu.cu
Created August 2, 2025 14:28
Simple Fused CUDA Kernel
extern "C" {
#include <cuda_runtime.h>
#include <math.h>
__device__ __forceinline__
float gelu(float x){
const float two_by_pi = 0.7978845608028654f; // literally: 2/pi
return 0.5f * x * (1.0f + tanhf(two_by_pi * (x + 0.044715f * x * x * x)));
}
import lance
import pyarrow as pa
from tqdm.auto import tqdm
import datasets
from transformers import AutoTokenizer
# We'll be using the GPT neo tokenizer for tokenizing the code files
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
# Drop the two now redundant columns
new_df_test.drop(['start_date'], axis=1, inplace=True)
new_df_test.drop(['end_date'], axis=1, inplace=True)
# View the final dataset
new_df_test.head()
# Let's now extract the weekdays using a similar way we used before
week_start = list()
week_end = list()
for i in start_time:
week_start.append(datetime.datetime.strptime(i, DATETIME).weekday())
for i in end_time:
week_end.append(datetime.datetime.strptime(i, DATETIME).weekday())
# Let's get the duration of the trip in seconds and then we will get the weekdays
start_time = new_df_test['start_date'].values
end_time = new_df_test['end_date'].values
DATETIME = '%Y-%m-%d %H:%M:%S'
duration = list()
for i in range(len(start_time)):
difference = datetime.datetime.strptime(end_time[i], DATETIME) - datetime.datetime.strptime(start_time[i], DATETIME)
# Now after this, let's encode these columns based on their category names
new_df_test = replace_small_categorical_data(new_df_test, column_name='train_type', categorical_names=train_type_names)
new_df_test = replace_small_categorical_data(new_df_test, column_name='train_class', categorical_names=train_class_names)
new_df_test = replace_small_categorical_data(new_df_test, column_name='fare', categorical_names=fare_names)
# First get category names
_, train_type_names = get_category_names(new_df_test, column_name='train_type')
_, train_class_names = get_category_names(new_df_test, column_name='train_class')
_, fare_names = get_category_names(new_df_test, column_name='fare')
new_df_test.fare.value_counts().plot(kind='bar')
plt.title('Distribution of Train Fare')
plt.xlabel('Train Fares')
plt.ylabel('Number of trains')
plt.yscale('symlog')
fig = plt.gcf()
fig.set_size_inches(15.5, 5.5)
new_df_test.train_class.value_counts().plot(kind='bar')
plt.title('Distribution of Train Classes')
plt.xlabel('Train Classes')
plt.ylabel('Number of trains')
plt.yscale('symlog')
fig = plt.gcf()
fig.set_size_inches(15.5, 5.5)
new_df_test.train_type.value_counts().plot(kind='bar')
plt.title('Distribution of Train Types')
plt.xlabel('Train Types')
plt.ylabel('Number of trains')
plt.yscale('symlog')
fig = plt.gcf()
fig.set_size_inches(15.5, 5.5)