Skip to content

Instantly share code, notes, and snippets.

@codesankalp
Created October 17, 2022 19:35
Show Gist options
  • Save codesankalp/0af4d7530f00c730bd42130884b4ed22 to your computer and use it in GitHub Desktop.
Save codesankalp/0af4d7530f00c730bd42130884b4ed22 to your computer and use it in GitHub Desktop.
import argparse
import json
import os
import subprocess
import sys
from importlib import import_module
from typing import Dict, Final, List
EXPECTED_COLUMN: Final = "expected_mapped_column"
SUBMISSION_COLUMN: Final = "submission_mapped_column"
NAME: Final = "name"
ID: Final = "id"
THIRD_PARTY_PACKAGES: Dict = {
"pandas": "pandas==1.4.2",
"sklearn": "scikit-learn==1.1.1",
}
try:
[import_module(package) for package in THIRD_PARTY_PACKAGES.keys()]
except ModuleNotFoundError:
subprocess.check_call(
[
sys.executable,
"-m",
"pip",
"install",
*THIRD_PARTY_PACKAGES.values(),
]
)
finally:
import pandas as pd
from sklearn.metrics import mean_squared_log_error
class InvalidSubmissionFile(Exception):
pass
def get_args(description: str) -> argparse.Namespace:
parser = argparse.ArgumentParser(description=description)
script_dir = os.path.dirname(os.path.abspath(__file__))
parser.add_argument(
"--metric",
type=str,
default=os.path.join(script_dir, "metric.json"),
help="Path to metric.json",
)
parser.add_argument(
"--expected",
type=str,
help="Path to expected csv file",
required=True,
)
parser.add_argument(
"--submission",
type=str,
help="Path to submission csv file",
required=True,
)
return parser.parse_args()
def get_metric(metric_file_path: str) -> Dict:
metric_json = open(metric_file_path, "r")
return json.load(metric_json)
def get_column_mapping(columns: List[Dict], column_name: str) -> List[Dict]:
mapping: Dict = dict()
for column in columns:
mapping_name = column.get(NAME).lower()
mapping[mapping_name] = column.get(column_name)
return mapping
def get_csv_data(file_path: str, mapping: Dict) -> pd.DataFrame:
df = pd.read_csv(file_path)
invert_mapping = {v: k for k, v in mapping.items()}
df.rename(columns=invert_mapping, inplace=True)
df.sort_values(by=ID, inplace=True)
return df
def get_score(
expected_file_path: str, submission_file_path: str, metric: Dict
) -> float:
columns = metric.get("columns", [])
expected_column_mapping = get_column_mapping(columns, EXPECTED_COLUMN)
submission_column_mapping = get_column_mapping(columns, SUBMISSION_COLUMN)
expected_df = get_csv_data(expected_file_path, expected_column_mapping)
submission_df = get_csv_data(
submission_file_path,
submission_column_mapping,
)
merged_df = pd.merge(expected_df, submission_df, on=ID, how="left")
try:
return mean_squared_log_error(
**{
"y_true": merged_df["expected_x"],
"y_pred": merged_df["expected_y"],
},
squared=False,
)
except ValueError as err:
print(str(err))
# if there is no data or partial data in the submission file
# it means the submission file is not valid
raise InvalidSubmissionFile("Invalid submission file")
if __name__ == "__main__":
score = 0
try:
args = get_args(description="Root Mean Squared Logarithmic Error")
metric = get_metric(args.metric)
expected_file_path = args.expected
submission_file_path = args.submission
score = get_score(
expected_file_path,
submission_file_path,
metric,
)
except Exception as err:
print(str(err))
finally:
print(f"Metric Score: {score}")
print(f"FS_SCORE: {(1-score)*100}%")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment