Skip to content

Instantly share code, notes, and snippets.

@preetesh33
Created June 4, 2024 16:20
Show Gist options
  • Save preetesh33/dafe683408d67d2fd3d78a243bb87c67 to your computer and use it in GitHub Desktop.
Save preetesh33/dafe683408d67d2fd3d78a243bb87c67 to your computer and use it in GitHub Desktop.
import pandas as pd
import pandas as pd
from joblib import load
import json
from datetime import datetime
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="Found null values in totalcalls.")
start_time = '2024-02-16'
end_time = '2024-02-17'
location_json = 'location_mapping.json'
customer_json = 'customer_mapping.json'
cluster_json = 'cluster_mapping.json'
project_json = 'project_mapping.json'
df_grouped = pd.read_csv('/opt/ML/final/data_augmentation/augmentation/merged_main.csv')
# Convert 'timestamp' to datetime
df_grouped['timestamp'] = pd.to_datetime(df_grouped['timestamp'])
# Convert object columns to category type
df_grouped['location'] = df_grouped['location'].astype('category')
df_grouped['customer'] = df_grouped['customer'].astype('category')
df_grouped['cluster'] = df_grouped['cluster'].astype('category')
df_grouped['project'] = df_grouped['project'].astype('category')
# Create a mapping of codes to labels for each category column dictionary
location_mapping_dict = dict(enumerate(df_grouped['location'].cat.categories))
customer_mapping_dict = dict(enumerate(df_grouped['customer'].cat.categories))
cluster_mapping_dict = dict(enumerate(df_grouped['cluster'].cat.categories))
project_mapping_dict = dict(enumerate(df_grouped['project'].cat.categories))
# Convert category columns to integer encoding
df_grouped['location'] = df_grouped['location'].cat.codes
df_grouped['customer'] = df_grouped['customer'].cat.codes
df_grouped['cluster'] = df_grouped['cluster'].cat.codes
df_grouped['project'] = df_grouped['project'].cat.codes
# two days data
#df_grouped = df_grouped[(df_grouped['timestamp'] >= start_time) & (df_grouped['timestamp'] <= end_time)]
df_grouped = df_grouped[(df_grouped['timestamp'] > start_time) & (df_grouped['timestamp'] < end_time)]
print("train head",df_grouped.head(10))
print("train tail",df_grouped.tail(10))
_horizon = len(df_grouped)
df_grouped['id_col'] = df_grouped['location'].astype(str) + '_' + df_grouped['customer'].astype(str) + '_' + df_grouped['cluster'].astype(str) + '_' + df_grouped['project'].astype(str)
# Load the model
model = load('test_train_1.joblib')
# Make predictions
pred = model.predict(horizon=1440,dynamic_dfs=[df_grouped], ids=df_grouped.id_col.unique().tolist())
# Splitting the numbers and creating two separate columns
pred[['location', 'customer', 'cluster', 'project']] = pred['id_col'].str.split('_', expand=True)
print("prediction head",pred.head(10))
# load json files
location_mapping_json = json.load(open(location_json))
customer_mapping_json = json.load(open(customer_json))
cluster_mapping_json = json.load(open(cluster_json))
project_mapping_json = json.load(open(project_json))
# Convert the keys back to integers
location_mapping_json = {int(k): v for k, v in location_mapping_json.items()}
customer_mapping_json = {int(k): v for k, v in customer_mapping_json.items()}
cluster_mapping_json = {int(k): v for k, v in cluster_mapping_json.items()}
project_mapping_json = {int(k): v for k, v in project_mapping_json.items()}
# Convert 'cat1' and 'customer' to integers
pred['location'] = pred['location'].astype(int)
pred['customer'] = pred['customer'].astype(int)
pred['cluster'] = pred['cluster'].astype(int)
pred['project'] = pred['project'].astype(int)
# Now, to convert back to original categories
pred['location'] = pred['location'].map(location_mapping_json)
pred['customer'] = pred['customer'].map(customer_mapping_json)
pred['cluster'] = pred['cluster'].map(cluster_mapping_json)
pred['project'] = pred['project'].map(project_mapping_json)
print("final pred data",pred.head(10))
pred.to_csv('test_pred_1,csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment