View collab_filter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pyspark.mllib.recommendation import ALS, Rating | |
from pyspark.sql import SparkSession, SQLContext | |
from sklearn.metrics.pairwise import cosine_similarity | |
if __name__ == "__main__": # run this by typing "python collaborative_filter.py" | |
app_name = "collab_filter_example" | |
# create a Spark context | |
spark = SparkSession.builder.master("local").appName(app_name).getOrCreate() |
View saving_mlflow_model.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.linear_model import ElasticNet | |
# these are internal wrapper/utility classes that we have developed to streamline the ML lifecycle process | |
from hs_mllib.model_lifecycle.packaging import MLModel, ScikitLearnModel | |
# this context MLFlow context manager allows experiment runs (parameters and metrics) to be tracked and easily queryable | |
with MLModel.mlflow.start_run() as run: | |
# data transformations and feature pre-processing code omitted (boiler-plate code) | |
... |
View tempfile.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tempfile import NamedTemporaryFile | |
import os | |
def test(): | |
tf = NamedTemporaryFile() | |
yield tf.name | |
if __name__ == "__main__": |
View start_query.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this is a custom processor class that we create to handle real-time inference | |
# we'll show the skeleton code for it below | |
processor = RealTimeInferenceProcessor() | |
query = df.writeStream \ | |
.option("checkpointLocation", "dbfs://pathToYourCheckpoint") \ # configure checkpointing in case of job failure | |
.foreachBatch(processor.process_batch) \ # for each micro-batch, apply this method | |
.outputMode("append") \ | |
.start() # start the stream query |
View process_micro_batch_pseudocode.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from abc import ABC, abstractmethod | |
from pyspark.sql.dataframe import DataFrame as SparkFrame | |
class Processor(ABC): | |
@abstractmethod | |
def process_batch(self, df: SparkFrame, epochID: str)-> None: | |
raise NotImplementedError | |
class RealTimeInferenceProcessor(Processor): |
View structured_streaming_kinesis_watermark.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.functions import window | |
# configure reading from the stream | |
kinesis_df = spark.readStream.format("kinesis") | |
.option("streamName", KINESIS_STREAM_NAME) | |
.option("region", AWS_REGION) | |
.option("roleArn", KINESIS_ACCESS_ROLE_ARN | |
.option("initialPosition", "latest") | |
.load() |
View main.go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"log" | |
"os" | |
"os/user" | |
"path/filepath" | |
) |
View data_descriptor_behavior.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
student = Student() | |
print(f"\nFirst access of name attribute on student: {student.name}") | |
student.__setattr__("name", "Yu Chen") | |
print(f"\n{student} instance symbol table: {student.__dict__}") | |
print(f"\nSecond access of name attribute on student: {student.name}") |
View custom_find_all.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
file = open("SSH_2k.log.txt") | |
logs = file.readlines() # returns a list of strings | |
find_events = r'sshd\[(24200|24206)\]: (.+)' #regex to parse out event messages from process ID 24200 or 24206 | |
def find_all(expression, strings): | |
return [re.search(expression, string).groups() for string in strings | |
if re.search(expression, string)] |
View cached_property.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class cached_property: | |
''' | |
Decorator that converts a method with a single self argument into a | |
property cached on the instance. | |
A cached property can be made out of an existing method: | |
(e.g. ``url = cached_property(get_absolute_url)``). | |
''' | |
# ... | |
def __get__(self, instance, cls=None): |
NewerOlder