Skip to content

Instantly share code, notes, and snippets.

View gautamdudeja90's full-sized avatar

Gautam Dudeja gautamdudeja90

View GitHub Profile
package com.apple.aml.stargate.pipeline.parser;
import com.apple.aml.stargate.beam.sdk.metrics.HubbleMetricsSink;
import com.apple.aml.stargate.beam.sdk.options.StargateOptions;
import com.apple.aml.stargate.beam.sdk.ts.ErrorInterceptor;
import com.apple.aml.stargate.beam.sdk.utils.ErrorPayloadConverterFns;
import com.apple.aml.stargate.beam.sdk.values.SCollection;
import com.apple.aml.stargate.common.constants.A3Constants;
import com.apple.aml.stargate.common.constants.CommonConstants;
import com.apple.aml.stargate.common.constants.PipelineConstants;
@gautamdudeja90
gautamdudeja90 / GradleCheatsheet.md
Created February 26, 2023 22:09 — forked from jiffle/GradleCheatsheet.md
Cheatsheet of Gradle Commands and Config

Command Cheatsheet

  • Convert Maven build to gradle

    gradle init

  • Initialise new project folder structure (Java example)

    gradle init --type java-library

# update-dynamo-autoscale-settings
import os
import sys
import boto3
from botocore.exceptions import ClientError
# Add path for additional imports
sys.path.append('./lib/python3.7/site-packages')
# Initialize boto3 clients
dynamodb = boto3.resource('dynamodb')
dynamodb_scaling = boto3.client('application-autoscaling')
def anova_machine(Cat_col, target_col, df):
"""ANOVA function. Provide the target variable column y, the main data set and a categorical column.
A pivot table will be produced. Then an ANOVA performed to see if the columns are significantly different from each other.
Currently set for 95% confidence, will update later for higher significance setting."""
p_table = df.pivot(columns=Cat_col, values=target_col)
total_columns = len(p_table.columns)
total_rows = len(p_table)
# ------------------------------------------------------------------------------
# Importing required libraries
# ------------------------------------------------------------------------------
from pyspark.sql.types import Row
# Importing required libraries for VIF Calculation
from pyspark.ml.regression import LinearRegression
from pyspark.ml.linalg import DenseVector
from pyspark.ml.linalg import Vectors
from pyspark.ml.evaluation import RegressionEvaluator
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(df['Sales'].values.reshape(-1, 1))
xx = np.linspace(df['Sales'].min(), df['Sales'].max(), len(df)).reshape(-1,1)
anomaly_score = isolation_forest.decision_function(xx)
outlier = isolation_forest.predict(xx)
plt.figure(figsize=(10,4))
plt.plot(xx, anomaly_score, label='anomaly score')
plt.fill_between(xx.T[0], np.min(anomaly_score), np.max(anomaly_score),
where=outlier==-1, color='r',
alpha=.4, label='outlier region')