Skip to content

Instantly share code, notes, and snippets.

View michael021997's full-sized avatar

michael021997

View GitHub Profile
import pandas as pd
import os
import glob
from datetime import datetime, timedelta
import concurrent.futures
import numpy as np
from calendar import monthrange
def convert_to_monthly(df):
"""
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt
from prophet_model import *
# Feature tracking dictionary to store information about each feature
feature_tracking = {}
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None) # Show all columns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.ensemble import RandomForestRegressor
@michael021997
michael021997 / gist:fd9590e327d80e1f7e4723fef02ee01b
Last active April 1, 2025 17:37
enhanced sklearn classfication
"""
Enhanced Classification Framework
A flexible framework for classification tasks with feature selection,
class imbalance handling, hyperparameter tuning, threshold optimization, and visualization.
Combines elements from both scikit-learn and PySpark frameworks.
"""
import pandas as pd
import numpy as np
# Enhanced PySpark Classification Framework
# This framework combines PySpark's distributed processing capabilities with
# scikit-learn's visualization and evaluation metrics from the General Classification Framework
import optuna
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, count, lit, variance, corr, row_number, udf
from pyspark.sql.types import FloatType
from pyspark.sql import Window
from pyspark.sql import functions as F
@michael021997
michael021997 / gist:c5d50b011e7402236952a920a7fd306e
Last active April 1, 2025 14:10
Classification Framework using PySpark
"""
General Classification Framework for PySpark
A flexible framework for classification tasks with feature importance analysis,
hyperparameter tuning using Optuna, threshold optimization, and visualization.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
@michael021997
michael021997 / gist:5ead0bfdf2ce2be78d24d2b07c6bdcaf
Last active March 21, 2025 21:46
Classification Framework using Pandas
"""
General Classification Framework
A flexible framework for classification tasks with feature importance analysis,
hyperparameter tuning, threshold optimization, and visualization.
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt