Shantanu Oak shantanuo

## model.py
clf = Pipeline([("dct", DictVectorizer()), ("svc", LinearSVC())])
params = {
    "svc__C": [1e15, 1e13, 1e11, 1e9, 1e7, 1e5, 1e3, 1e1, 1e-1, 1e-3, 1e-5]
}
gs = GridSearchCV(clf, params, cv=10, verbose=2, n_jobs=-1)
gs.fit(X, y)
model = gs.best_estimator_

## extract_repo.py
# downlaod links from dynamoDB
!aws dynamodb scan --table-name Movies --query "Items[*].[id.S,title.S]" --output json | sort -u > /tmp/download.txt

# copy github links and extract repo URLs
import pandas as pd
mylist = """
        "https://github.com/apoorvnandan/speech-recognition-primer"
        "https://github.com/asmitakulkarni/QuoteGenerator"
        "https://github.com/cjhutto/vaderSentiment"
        "https://github.com/docker/docker-bench-security"

## multi.py
import multiprocessing
import os
import requests

class MultiProcDownloader(object):
    def __init__(self, urls):
        self.urls = urls

    def run(self):
        jobs=[]

## Regression.py
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from keras.utils import to_categorical

df = pd.read_json("../data/news_category_dataset.json", lines=True)

## post_response

Response Headers
{"X-Amzn-Trace-Id":"Root=1-5d3fda3a-13e3be3a90a70be98b3be772;Sampled=0","Content-Type":"application/json"}


Logs
Execution log for request afd9f3f2-b28d-11e9-bb2a-4bb7e1da2e13
Tue Jul 30 05:48:42 UTC 2019 : Starting execution for request: afd9f3f2-b28d-11e9-bb2a-4bb7e1da2e13
Tue Jul 30 05:48:42 UTC 2019 : HTTP Method: POST, Resource Path: /

## mongo-ls.js
// Usage: mongo {Server without mongodb:// example 127.0.0.1:27017}/{DbName} [-u {Username}] [-p {Password}] < ./mongo-ls.js

var collections = db.getCollectionNames();

print('Collections inside the db:');
for(var i = 0; i < collections.length; i++){
  var name = collections[i];

  if(name.substr(0, 6) != 'system')
    print(name + ' - ' + db[name].count() + ' records');

## vgg_like_charcnn.py
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.contrib.keras.api.keras.losses import binary_crossentropy
from collections import Counter

## npm.log
# output of npm start command...
# npm start

> amplify-js-app@1.0.0 start /amplify-js-app
> webpack && webpack-dev-server --mode development

Hash: 7ae7e983cf728aac0aca
Version: webpack 4.29.6
Time: 107ms
Built at: 03/14/2019 7:52:13 AM

## aws_bill.py
import boto3
import datetime
import pandas as pd
import numpy as np

now = datetime.datetime.utcnow()
start = (now - datetime.timedelta(days=40)).strftime("%Y-%m-%d")
end = now.strftime("%Y-%m-%d")

cd = boto3.client("ce",

## tf_learn.py
# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
import pandas as pd
import numpy as np
import re

import tensorflow_hub as hub
import tensorflow as tf
import keras
from tensorflow.python.keras.layers import Input, Dense, Lambda
	clf = Pipeline([("dct", DictVectorizer()), ("svc", LinearSVC())])
	params = {
	"svc__C": [1e15, 1e13, 1e11, 1e9, 1e7, 1e5, 1e3, 1e1, 1e-1, 1e-3, 1e-5]
	}
	gs = GridSearchCV(clf, params, cv=10, verbose=2, n_jobs=-1)
	gs.fit(X, y)
	model = gs.best_estimator_
	# downlaod links from dynamoDB
	!aws dynamodb scan --table-name Movies --query "Items[*].[id.S,title.S]" --output json \| sort -u > /tmp/download.txt

	# copy github links and extract repo URLs
	import pandas as pd
	mylist = """
	"https://github.com/apoorvnandan/speech-recognition-primer"
	"https://github.com/asmitakulkarni/QuoteGenerator"
	"https://github.com/cjhutto/vaderSentiment"
	"https://github.com/docker/docker-bench-security"
	import multiprocessing
	import os
	import requests

	class MultiProcDownloader(object):
	def __init__(self, urls):
	self.urls = urls

	def run(self):
	jobs=[]
	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import LabelEncoder
	from sklearn.metrics import label_ranking_average_precision_score
	from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LogisticRegression
	from keras.utils import to_categorical

	df = pd.read_json("../data/news_category_dataset.json", lines=True)

	Response Headers
	{"X-Amzn-Trace-Id":"Root=1-5d3fda3a-13e3be3a90a70be98b3be772;Sampled=0","Content-Type":"application/json"}



	Logs
	Execution log for request afd9f3f2-b28d-11e9-bb2a-4bb7e1da2e13
	Tue Jul 30 05:48:42 UTC 2019 : Starting execution for request: afd9f3f2-b28d-11e9-bb2a-4bb7e1da2e13
	Tue Jul 30 05:48:42 UTC 2019 : HTTP Method: POST, Resource Path: /
	// Usage: mongo {Server without mongodb:// example 127.0.0.1:27017}/{DbName} [-u {Username}] [-p {Password}] < ./mongo-ls.js

	var collections = db.getCollectionNames();

	print('Collections inside the db:');
	for(var i = 0; i < collections.length; i++){
	var name = collections[i];

	if(name.substr(0, 6) != 'system')
	print(name + ' - ' + db[name].count() + ' records');
	# This Python 3 environment comes with many helpful analytics libraries installed
	# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
	# For example, here's several helpful packages to load in

	import numpy as np # linear algebra
	import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
	import tensorflow as tf
	from tensorflow.contrib.keras.api.keras.losses import binary_crossentropy
	from collections import Counter
	# output of npm start command...
	# npm start

	> amplify-js-app@1.0.0 start /amplify-js-app
	> webpack && webpack-dev-server --mode development

	Hash: 7ae7e983cf728aac0aca
	Version: webpack 4.29.6
	Time: 107ms
	Built at: 03/14/2019 7:52:13 AM
	import boto3
	import datetime
	import pandas as pd
	import numpy as np

	now = datetime.datetime.utcnow()
	start = (now - datetime.timedelta(days=40)).strftime("%Y-%m-%d")
	end = now.strftime("%Y-%m-%d")

	cd = boto3.client("ce",
	# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
	import pandas as pd
	import numpy as np
	import re

	import tensorflow_hub as hub
	import tensorflow as tf
	import keras
	from tensorflow.python.keras.layers import Input, Dense, Lambda