Gautam Dudeja gautamdudeja90

## pipeline.java
package com.apple.aml.stargate.pipeline.parser;

import com.apple.aml.stargate.beam.sdk.metrics.HubbleMetricsSink;
import com.apple.aml.stargate.beam.sdk.options.StargateOptions;
import com.apple.aml.stargate.beam.sdk.ts.ErrorInterceptor;
import com.apple.aml.stargate.beam.sdk.utils.ErrorPayloadConverterFns;
import com.apple.aml.stargate.beam.sdk.values.SCollection;
import com.apple.aml.stargate.common.constants.A3Constants;
import com.apple.aml.stargate.common.constants.CommonConstants;
import com.apple.aml.stargate.common.constants.PipelineConstants;

## GradleCheatsheet.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                gautamdudeja90
                / GradleCheatsheet.md
            
            
              Created
              February 26, 2023 22:09
                — forked from jiffle/GradleCheatsheet.md
            
              
                Cheatsheet of Gradle Commands and Config
              
          
    Command Cheatsheet


Convert Maven build to gradle
gradle init


Initialise new project folder structure (Java example)
gradle init --type java-library


## autoscale.py
# update-dynamo-autoscale-settings
import os
import sys
import boto3
from botocore.exceptions import ClientError
# Add path for additional imports
sys.path.append('./lib/python3.7/site-packages')
# Initialize boto3 clients
dynamodb = boto3.resource('dynamodb')
dynamodb_scaling = boto3.client('application-autoscaling')

## anova_machine.py
def anova_machine(Cat_col, target_col, df):
    """ANOVA function.  Provide the target variable column y, the main data set and a categorical column.
    A pivot table will be produced. Then an ANOVA performed to see if the columns are significantly different from each other.
    Currently set for 95% confidence, will update later for higher significance setting."""

    p_table = df.pivot(columns=Cat_col, values=target_col)

    total_columns = len(p_table.columns)

    total_rows = len(p_table)

## vif_multicollinearity.py
# ------------------------------------------------------------------------------
# Importing required libraries
# ------------------------------------------------------------------------------
from pyspark.sql.types import Row

# Importing required libraries for VIF Calculation
from pyspark.ml.regression import LinearRegression
from pyspark.ml.linalg import DenseVector
from pyspark.ml.linalg import Vectors
from pyspark.ml.evaluation import RegressionEvaluator

## outlier-detection.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                gautamdudeja90
                / outlier-detection.ipynb
            
            
              Created
              July 20, 2020 05:27
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## outlier_isolation.py
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(df['Sales'].values.reshape(-1, 1))
xx = np.linspace(df['Sales'].min(), df['Sales'].max(), len(df)).reshape(-1,1)
anomaly_score = isolation_forest.decision_function(xx)
outlier = isolation_forest.predict(xx)
plt.figure(figsize=(10,4))
plt.plot(xx, anomaly_score, label='anomaly score')
plt.fill_between(xx.T[0], np.min(anomaly_score), np.max(anomaly_score),
                 where=outlier==-1, color='r',
                 alpha=.4, label='outlier region')
	package com.apple.aml.stargate.pipeline.parser;

	import com.apple.aml.stargate.beam.sdk.metrics.HubbleMetricsSink;
	import com.apple.aml.stargate.beam.sdk.options.StargateOptions;
	import com.apple.aml.stargate.beam.sdk.ts.ErrorInterceptor;
	import com.apple.aml.stargate.beam.sdk.utils.ErrorPayloadConverterFns;
	import com.apple.aml.stargate.beam.sdk.values.SCollection;
	import com.apple.aml.stargate.common.constants.A3Constants;
	import com.apple.aml.stargate.common.constants.CommonConstants;
	import com.apple.aml.stargate.common.constants.PipelineConstants;
	# update-dynamo-autoscale-settings
	import os
	import sys
	import boto3
	from botocore.exceptions import ClientError
	# Add path for additional imports
	sys.path.append('./lib/python3.7/site-packages')
	# Initialize boto3 clients
	dynamodb = boto3.resource('dynamodb')
	dynamodb_scaling = boto3.client('application-autoscaling')
	def anova_machine(Cat_col, target_col, df):
	"""ANOVA function. Provide the target variable column y, the main data set and a categorical column.
	A pivot table will be produced. Then an ANOVA performed to see if the columns are significantly different from each other.
	Currently set for 95% confidence, will update later for higher significance setting."""

	p_table = df.pivot(columns=Cat_col, values=target_col)

	total_columns = len(p_table.columns)

	total_rows = len(p_table)
	# ------------------------------------------------------------------------------
	# Importing required libraries
	# ------------------------------------------------------------------------------
	from pyspark.sql.types import Row

	# Importing required libraries for VIF Calculation
	from pyspark.ml.regression import LinearRegression
	from pyspark.ml.linalg import DenseVector
	from pyspark.ml.linalg import Vectors
	from pyspark.ml.evaluation import RegressionEvaluator
	isolation_forest = IsolationForest(n_estimators=100)
	isolation_forest.fit(df['Sales'].values.reshape(-1, 1))
	xx = np.linspace(df['Sales'].min(), df['Sales'].max(), len(df)).reshape(-1,1)
	anomaly_score = isolation_forest.decision_function(xx)
	outlier = isolation_forest.predict(xx)
	plt.figure(figsize=(10,4))
	plt.plot(xx, anomaly_score, label='anomaly score')
	plt.fill_between(xx.T[0], np.min(anomaly_score), np.max(anomaly_score),
	where=outlier==-1, color='r',
	alpha=.4, label='outlier region')