Skip to content

Instantly share code, notes, and snippets.

@preetesh33
Created June 4, 2024 16:18
Show Gist options
  • Save preetesh33/581c98a757df6007fa94767f94ec30ed to your computer and use it in GitHub Desktop.
Save preetesh33/581c98a757df6007fa94767f94ec30ed to your computer and use it in GitHub Desktop.
import pandas as pd
import pandas as pd
from joblib import load
import json
from datetime import datetime
import warnings
import logging
import yaml
# Suppressing the warning
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
def load_config(file_path):
"""Reads a yaml file and returns the configuration as a dictionary
Args:
file_path (str): The path to the yaml file
Returns:
dict: The configuration as a dictionary
"""
try:
with open(file_path, 'r') as file:
config = yaml.safe_load(file)
return config
except FileNotFoundError as e:
print(f"File {file_path} not found, error: {e}")
logger.log(logging.ERROR, f"File {file_path} not found, error: {e}")
except Exception as e:
print(f"Error loading config file: {e}")
logger.log(logging.ERROR, f"Error loading config file: {e}")
config = load_config('etc/config.yaml')
filename = config['data']['filename']
log_filename = config['logging']['log_filename']
dates = config['dates']['start_date']
dates = config['dates']['end_date']
#### logging ####
logfilename=log_filename
logging.basicConfig(filename=logfilename,format='%(asctime)s %(message)s')#,filemode='w')
logger = logging.getLogger()
logger.setLevel(logging.INFO) #DEBUG
# spliting data for prdiction
start_time = '2024-02-16'
end_time = '2024-02-17'
# category mapping json files
location_json = 'location_mapping.json'
customer_json = 'customer_mapping.json'
cluster_json = 'cluster_mapping.json'
project_json = 'project_mapping.json'
# Load the data file
df_grouped = pd.read_csv('ml_data_new_completed_startdate_1711450276.csv')
# Convert 'timestamp' to datetime
df_grouped['timestamp'] = pd.to_datetime(df_grouped['timestamp'])
# Convert object columns to category type
df_grouped['location'] = df_grouped['location'].astype('category')
df_grouped['customer'] = df_grouped['customer'].astype('category')
df_grouped['cluster'] = df_grouped['cluster'].astype('category')
df_grouped['project'] = df_grouped['project'].astype('category')
# Create a mapping of codes to labels for each category column dictionary
location_mapping_dict = dict(enumerate(df_grouped['location'].cat.categories))
customer_mapping_dict = dict(enumerate(df_grouped['customer'].cat.categories))
cluster_mapping_dict = dict(enumerate(df_grouped['cluster'].cat.categories))
project_mapping_dict = dict(enumerate(df_grouped['project'].cat.categories))
# Convert category columns to integer encoding
df_grouped['location'] = df_grouped['location'].cat.codes
df_grouped['customer'] = df_grouped['customer'].cat.codes
df_grouped['cluster'] = df_grouped['cluster'].cat.codes
df_grouped['project'] = df_grouped['project'].cat.codes
# two days data
df_grouped = df_grouped[(df_grouped['timestamp'] >= start_time) & (df_grouped['timestamp'] <= end_time)]
print(df_grouped.head(10))
df_grouped['id_col'] = df_grouped['location'].astype(str) + '_' + df_grouped['customer'].astype(str) + '_' + df_grouped['cluster'].astype(str) + '_' + df_grouped['project'].astype(str)
# Load the model
model = load('test_train_1.joblib')
# Make predictions
pred = model.predict(horizon=1440,dynamic_dfs=[df_grouped], ids=df_grouped.id_col.unique().tolist())
# Splitting the numbers and creating two separate columns
pred[['location', 'customer', 'cluster', 'project']] = pred['id_col'].str.split('_', expand=True)
print(pred.head(10))
# load json files
location_mapping_json = json.load(open(location_json))
customer_mapping_json = json.load(open(customer_json))
cluster_mapping_json = json.load(open(cluster_json))
project_mapping_json = json.load(open(project_json))
# Convert the keys back to integers
location_mapping_json = {int(k): v for k, v in location_mapping_json.items()}
customer_mapping_json = {int(k): v for k, v in customer_mapping_json.items()}
cluster_mapping_json = {int(k): v for k, v in cluster_mapping_json.items()}
project_mapping_json = {int(k): v for k, v in project_mapping_json.items()}
# Convert 'cat1' and 'customer' to integers
pred['location'] = pred['location'].astype(int)
pred['customer'] = pred['customer'].astype(int)
pred['cluster'] = pred['cluster'].astype(int)
pred['project'] = pred['project'].astype(int)
# Now, to convert back to original categories
pred['location'] = pred['location'].map(location_mapping_json)
pred['customer'] = pred['customer'].map(customer_mapping_json)
pred['cluster'] = pred['cluster'].map(cluster_mapping_json)
pred['project'] = pred['project'].map(project_mapping_json)
print("pred",pred.head(10))
pred.to_csv('prediction_from_model_3_1.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment