preetesh33/pred

## pred
import pandas as pd
import pandas as pd
from joblib import load
import json
from datetime import datetime
import warnings
import logging
import yaml


# Suppressing the warning
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

def load_config(file_path):
    """Reads a yaml file and returns the configuration as a dictionary

    Args:
        file_path (str): The path to the yaml file

    Returns:
        dict: The configuration as a dictionary
    """
    try:
        with open(file_path, 'r') as file:
            config = yaml.safe_load(file)
        return config
    except FileNotFoundError as e:
        print(f"File {file_path} not found, error: {e}")
        logger.log(logging.ERROR, f"File {file_path} not found, error: {e}")
    except Exception as e:
        print(f"Error loading config file: {e}")
        logger.log(logging.ERROR, f"Error loading config file: {e}")

config = load_config('etc/config.yaml')

filename = config['data']['filename']
log_filename = config['logging']['log_filename']
dates = config['dates']['start_date']
dates = config['dates']['end_date']


#### logging ####
logfilename=log_filename
logging.basicConfig(filename=logfilename,format='%(asctime)s %(message)s')#,filemode='w')
logger = logging.getLogger()
logger.setLevel(logging.INFO) #DEBUG

# spliting data for prdiction
start_time = '2024-02-16'
end_time = '2024-02-17'

# category mapping json files
location_json = 'location_mapping.json'
customer_json = 'customer_mapping.json'
cluster_json = 'cluster_mapping.json'
project_json = 'project_mapping.json'

# Load the data file
df_grouped = pd.read_csv('ml_data_new_completed_startdate_1711450276.csv')

# Convert 'timestamp' to datetime
df_grouped['timestamp'] = pd.to_datetime(df_grouped['timestamp'])
# Convert object columns to category type
df_grouped['location'] = df_grouped['location'].astype('category')
df_grouped['customer'] = df_grouped['customer'].astype('category')
df_grouped['cluster'] = df_grouped['cluster'].astype('category')
df_grouped['project'] = df_grouped['project'].astype('category')

# Create a mapping of codes to labels for each category column dictionary
location_mapping_dict = dict(enumerate(df_grouped['location'].cat.categories))
customer_mapping_dict = dict(enumerate(df_grouped['customer'].cat.categories))
cluster_mapping_dict = dict(enumerate(df_grouped['cluster'].cat.categories))
project_mapping_dict = dict(enumerate(df_grouped['project'].cat.categories))

# Convert category columns to integer encoding
df_grouped['location'] = df_grouped['location'].cat.codes
df_grouped['customer'] = df_grouped['customer'].cat.codes
df_grouped['cluster'] = df_grouped['cluster'].cat.codes
df_grouped['project'] = df_grouped['project'].cat.codes

# two days data
df_grouped = df_grouped[(df_grouped['timestamp'] >= start_time) & (df_grouped['timestamp'] <= end_time)]

print(df_grouped.head(10))

df_grouped['id_col'] = df_grouped['location'].astype(str) + '_' + df_grouped['customer'].astype(str) + '_' + df_grouped['cluster'].astype(str) + '_' + df_grouped['project'].astype(str)

# Load the model
model = load('test_train_1.joblib')

# Make predictions
pred = model.predict(horizon=1440,dynamic_dfs=[df_grouped], ids=df_grouped.id_col.unique().tolist())

# Splitting the numbers and creating two separate columns
pred[['location', 'customer', 'cluster', 'project']] = pred['id_col'].str.split('_', expand=True)
print(pred.head(10))

# load json files
location_mapping_json = json.load(open(location_json))
customer_mapping_json = json.load(open(customer_json))
cluster_mapping_json = json.load(open(cluster_json))
project_mapping_json = json.load(open(project_json))

# Convert the keys back to integers
location_mapping_json = {int(k): v for k, v in location_mapping_json.items()}
customer_mapping_json = {int(k): v for k, v in customer_mapping_json.items()}
cluster_mapping_json = {int(k): v for k, v in cluster_mapping_json.items()}
project_mapping_json =  {int(k): v for k, v in project_mapping_json.items()}

# Convert 'cat1' and 'customer' to integers
pred['location'] = pred['location'].astype(int)
pred['customer'] = pred['customer'].astype(int)
pred['cluster'] = pred['cluster'].astype(int)
pred['project'] = pred['project'].astype(int)

# Now, to convert back to original categories
pred['location'] = pred['location'].map(location_mapping_json)
pred['customer'] = pred['customer'].map(customer_mapping_json)
pred['cluster'] = pred['cluster'].map(cluster_mapping_json)
pred['project'] = pred['project'].map(project_mapping_json)

print("pred",pred.head(10))
pred.to_csv('prediction_from_model_3_1.csv', index=False)
	import pandas as pd
	import pandas as pd
	from joblib import load
	import json
	from datetime import datetime
	import warnings
	import logging
	import yaml


	# Suppressing the warning
	warnings.filterwarnings("ignore", category=FutureWarning)
	warnings.filterwarnings("ignore", category=DeprecationWarning)

	def load_config(file_path):
	"""Reads a yaml file and returns the configuration as a dictionary

	Args:
	file_path (str): The path to the yaml file

	Returns:
	dict: The configuration as a dictionary
	"""
	try:
	with open(file_path, 'r') as file:
	config = yaml.safe_load(file)
	return config
	except FileNotFoundError as e:
	print(f"File {file_path} not found, error: {e}")
	logger.log(logging.ERROR, f"File {file_path} not found, error: {e}")
	except Exception as e:
	print(f"Error loading config file: {e}")
	logger.log(logging.ERROR, f"Error loading config file: {e}")

	config = load_config('etc/config.yaml')

	filename = config['data']['filename']
	log_filename = config['logging']['log_filename']
	dates = config['dates']['start_date']
	dates = config['dates']['end_date']


	#### logging ####
	logfilename=log_filename
	logging.basicConfig(filename=logfilename,format='%(asctime)s %(message)s')#,filemode='w')
	logger = logging.getLogger()
	logger.setLevel(logging.INFO) #DEBUG

	# spliting data for prdiction
	start_time = '2024-02-16'
	end_time = '2024-02-17'

	# category mapping json files
	location_json = 'location_mapping.json'
	customer_json = 'customer_mapping.json'
	cluster_json = 'cluster_mapping.json'
	project_json = 'project_mapping.json'

	# Load the data file
	df_grouped = pd.read_csv('ml_data_new_completed_startdate_1711450276.csv')

	# Convert 'timestamp' to datetime
	df_grouped['timestamp'] = pd.to_datetime(df_grouped['timestamp'])
	# Convert object columns to category type
	df_grouped['location'] = df_grouped['location'].astype('category')
	df_grouped['customer'] = df_grouped['customer'].astype('category')
	df_grouped['cluster'] = df_grouped['cluster'].astype('category')
	df_grouped['project'] = df_grouped['project'].astype('category')

	# Create a mapping of codes to labels for each category column dictionary
	location_mapping_dict = dict(enumerate(df_grouped['location'].cat.categories))
	customer_mapping_dict = dict(enumerate(df_grouped['customer'].cat.categories))
	cluster_mapping_dict = dict(enumerate(df_grouped['cluster'].cat.categories))
	project_mapping_dict = dict(enumerate(df_grouped['project'].cat.categories))

	# Convert category columns to integer encoding
	df_grouped['location'] = df_grouped['location'].cat.codes
	df_grouped['customer'] = df_grouped['customer'].cat.codes
	df_grouped['cluster'] = df_grouped['cluster'].cat.codes
	df_grouped['project'] = df_grouped['project'].cat.codes

	# two days data
	df_grouped = df_grouped[(df_grouped['timestamp'] >= start_time) & (df_grouped['timestamp'] <= end_time)]

	print(df_grouped.head(10))

	df_grouped['id_col'] = df_grouped['location'].astype(str) + '_' + df_grouped['customer'].astype(str) + '_' + df_grouped['cluster'].astype(str) + '_' + df_grouped['project'].astype(str)

	# Load the model
	model = load('test_train_1.joblib')

	# Make predictions
	pred = model.predict(horizon=1440,dynamic_dfs=[df_grouped], ids=df_grouped.id_col.unique().tolist())

	# Splitting the numbers and creating two separate columns
	pred[['location', 'customer', 'cluster', 'project']] = pred['id_col'].str.split('_', expand=True)
	print(pred.head(10))

	# load json files
	location_mapping_json = json.load(open(location_json))
	customer_mapping_json = json.load(open(customer_json))
	cluster_mapping_json = json.load(open(cluster_json))
	project_mapping_json = json.load(open(project_json))

	# Convert the keys back to integers
	location_mapping_json = {int(k): v for k, v in location_mapping_json.items()}
	customer_mapping_json = {int(k): v for k, v in customer_mapping_json.items()}
	cluster_mapping_json = {int(k): v for k, v in cluster_mapping_json.items()}
	project_mapping_json = {int(k): v for k, v in project_mapping_json.items()}

	# Convert 'cat1' and 'customer' to integers
	pred['location'] = pred['location'].astype(int)
	pred['customer'] = pred['customer'].astype(int)
	pred['cluster'] = pred['cluster'].astype(int)
	pred['project'] = pred['project'].astype(int)

	# Now, to convert back to original categories
	pred['location'] = pred['location'].map(location_mapping_json)
	pred['customer'] = pred['customer'].map(customer_mapping_json)
	pred['cluster'] = pred['cluster'].map(cluster_mapping_json)
	pred['project'] = pred['project'].map(project_mapping_json)

	print("pred",pred.head(10))
	pred.to_csv('prediction_from_model_3_1.csv', index=False)