This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import inspect | |
import tempfile | |
from pathlib import Path | |
from typing import Optional, Dict, Any, Tuple | |
import mlflow.sklearn | |
from hyperopt import STATUS_OK, tpe, fmin, Trials | |
from mlflow.models.signature import infer_signature | |
from pyspark.cloudpickle import dump | |
from sklearn.metrics import cohen_kappa_score, roc_auc_score, f1_score |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Provider: | |
@staticmethod | |
def get_search_space() -> SearchSpace: | |
search_space = { | |
"classifier": { | |
"max_depth": hp.choice("max_depth", np.arange(3, 10, dtype=int)), | |
"n_estimators": hp.choice( | |
"n_estimators", np.arange(10, 100, dtype=int) | |
), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Provider: | |
@classmethod | |
def get_data( | |
cls, | |
data: pd.DataFrame, | |
source_metadata: SourceMetadata, | |
logger: Optional[Any] = None, | |
limit: Optional[int] = None, | |
) -> ModelData: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class ModelBuilderTask(Task): | |
def _read_data(self) -> Tuple[pd.DataFrame, SourceMetadata]: | |
db = self.conf["input"]["database"] | |
table_name = self.conf["input"]["table"] | |
full_table_name = f"{db}.{table_name}" | |
table = DeltaTable.forName(self.spark, full_table_name) | |
last_version = table.history(limit=1).toPandas()["version"][0] | |
_data = self.spark.sql( | |
f"select * from {full_table_name} VERSION AS OF {last_version}" | |
).toPandas() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Dict, Any | |
import pandas as pd | |
from pydantic import BaseModel | |
class FlexibleBaseModel(BaseModel): | |
class Config: | |
arbitrary_types_allowed = True |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DatasetLoaderTask(Task): | |
def get_data(self, limit: Optional[int] = None) -> PandasDataFrame: | |
self.logger.info("Loading the dataset") | |
dataset = openml.datasets.get_dataset( | |
"CreditCardFraudDetection", download_qualities=False | |
) | |
X, y, _, _ = dataset.get_data(dataset_format="dataframe") | |
_df = X | |
# sanitize column names | |
_df.rename(columns={"Class": "TARGET"}, inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SearchSpace = Dict[str, Dict[str, Any]] | |
class Provider: | |
@staticmethod | |
def get_pipeline(params: Optional[SearchSpace] = None) -> Pipeline: | |
if not params: | |
params = {} | |
pipeline = Pipeline( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DatasetLoaderTask(Task): | |
def get_data(self, limit: Optional[int] = None) -> SparkDataFrame: | |
self.logger.info("Loading the dataset") | |
dataset = openml.datasets.get_dataset("CreditCardFraudDetection") | |
X, y, _, _ = dataset.get_data(dataset_format="dataframe") | |
_df = X | |
# sanitize column names | |
_df.rename(columns={"Class": "TARGET"}, inplace=True) | |
_df.columns = [c.lower() for c in _df] | |
_df.drop(columns=["time"], inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.renarde.demos.apps | |
import com.typesafe.config.{Config, ConfigFactory} | |
import net.renarde.demos.common.ConfigProvider | |
import net.renarde.demos.utils.{KafkaSupport, SparkSupport} | |
import org.scalatest.funsuite.AnyFunSuite | |
import collection.JavaConverters._ | |
class AppTests extends AnyFunSuite with KafkaSupport with SparkSupport{ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.renarde.demos.apps | |
import net.renarde.demos.common.GenericApp | |
object ReaderApp extends GenericApp { | |
val appName = "reader" | |
log.info(s"Running the application $appName") | |
log.info(s"Current application config: $config") | |
log.info("Application finished") | |
} |