Skip to content

Instantly share code, notes, and snippets.

clf = Pipeline([("dct", DictVectorizer()), ("svc", LinearSVC())])
params = {
"svc__C": [1e15, 1e13, 1e11, 1e9, 1e7, 1e5, 1e3, 1e1, 1e-1, 1e-3, 1e-5]
}
gs = GridSearchCV(clf, params, cv=10, verbose=2, n_jobs=-1)
gs.fit(X, y)
model = gs.best_estimator_
@shantanuo
shantanuo / extract_repo.py
Created September 6, 2019 09:07
link extractor project script
# downlaod links from dynamoDB
!aws dynamodb scan --table-name Movies --query "Items[*].[id.S,title.S]" --output json | sort -u > /tmp/download.txt
# copy github links and extract repo URLs
import pandas as pd
mylist = """
"https://github.com/apoorvnandan/speech-recognition-primer"
"https://github.com/asmitakulkarni/QuoteGenerator"
"https://github.com/cjhutto/vaderSentiment"
"https://github.com/docker/docker-bench-security"
@shantanuo
shantanuo / multi.py
Created September 2, 2019 04:00
file to download 2 files at the same time
import multiprocessing
import os
import requests
class MultiProcDownloader(object):
def __init__(self, urls):
self.urls = urls
def run(self):
jobs=[]
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import label_ranking_average_precision_score
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from keras.utils import to_categorical
df = pd.read_json("../data/news_category_dataset.json", lines=True)
@shantanuo
shantanuo / post_response
Created July 30, 2019 05:50
api gateway test response
Response Headers
{"X-Amzn-Trace-Id":"Root=1-5d3fda3a-13e3be3a90a70be98b3be772;Sampled=0","Content-Type":"application/json"}
Logs
Execution log for request afd9f3f2-b28d-11e9-bb2a-4bb7e1da2e13
Tue Jul 30 05:48:42 UTC 2019 : Starting execution for request: afd9f3f2-b28d-11e9-bb2a-4bb7e1da2e13
Tue Jul 30 05:48:42 UTC 2019 : HTTP Method: POST, Resource Path: /
@shantanuo
shantanuo / mongo-ls.js
Created April 24, 2019 05:23 — forked from matteofigus/mongo-ls.js
A script to list all the collections and document count for a specific mongodb db
// Usage: mongo {Server without mongodb:// example 127.0.0.1:27017}/{DbName} [-u {Username}] [-p {Password}] < ./mongo-ls.js
var collections = db.getCollectionNames();
print('Collections inside the db:');
for(var i = 0; i < collections.length; i++){
var name = collections[i];
if(name.substr(0, 6) != 'system')
print(name + ' - ' + db[name].count() + ' records');
@shantanuo
shantanuo / vgg_like_charcnn.py
Created April 1, 2019 06:33
tensorflow toxic comment kaggle competition python code
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.contrib.keras.api.keras.losses import binary_crossentropy
from collections import Counter
@shantanuo
shantanuo / npm.log
Created March 14, 2019 07:53
npm start output
# output of npm start command...
# npm start
> amplify-js-app@1.0.0 start /amplify-js-app
> webpack && webpack-dev-server --mode development
Hash: 7ae7e983cf728aac0aca
Version: webpack 4.29.6
Time: 107ms
Built at: 03/14/2019 7:52:13 AM
@shantanuo
shantanuo / aws_bill.py
Last active February 18, 2019 05:29
Get billing details of last 40 days using boto
import boto3
import datetime
import pandas as pd
import numpy as np
now = datetime.datetime.utcnow()
start = (now - datetime.timedelta(days=40)).strftime("%Y-%m-%d")
end = now.strftime("%Y-%m-%d")
cd = boto3.client("ce",
@shantanuo
shantanuo / tf_learn.py
Created February 10, 2019 07:18
tensorflow code
# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
import pandas as pd
import numpy as np
import re
import tensorflow_hub as hub
import tensorflow as tf
import keras
from tensorflow.python.keras.layers import Input, Dense, Lambda