Shantanu Oak shantanuo

## lambda_for_redshift.py
# https://aws.amazon.com/blogs/big-data/accessing-external-components-using-amazon-redshift-lambda-udfs/

import json

def lambda_handler(event, context):
  number = str(event["arguments"][0][0])
  import requests
  ret = dict()
  try:
    res = list()

## geodose.py
# https://www.geodose.com/2020/06/how-to-create-coronavirus-time-series-map.html

import pandas as pd
import numpy as np

df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")

df = df.set_index(list(df.columns[:4]))
df = df.stack().reset_index()
df.columns = ["province", "country", "lat", "lon", "date", "n_death"]

## spacy_error.txt
# /root/miniforge3/bin/pip install spacy
Collecting spacy
  Using cached spacy-2.3.1.tar.gz (5.9 MB)
  Installing build dependencies ... error
  ERROR: Command errored out with exit status 1:
   command: /root/miniforge3/bin/python3.7 /root/miniforge3/lib/python3.7/site-packages/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-ahxo0t0p/overlay --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- setuptools wheel 'cython>=0.25' 'cymem>=2.0.2,<2.1.0' 'preshed>=3.0.2,<3.1.0' 'murmurhash>=0.28.0,<1.1.0' thinc==7.4.1
       cwd: None
  Complete output (196 lines):
  Collecting setuptools
    Downloading setuptools-49.1.2-py3-none-any.whl (789 kB)

## BrowserHistory.py
import pandas as pd
import json

with open("BrowserHistory.json", "r") as read_file:
    developer = json.load(read_file)

df = pd.DataFrame(developer["Browser History"])

df["UNIXTIME"] = pd.to_datetime(df["time_usec"], unit="us")

## bruhadkosh.txt
DELETE bruhadkosh/

PUT bruhadkosh
{  "mappings": {
    "properties": {
      "kosh": {
        "type": "text",
        "fields": {
          "keyword": {
            "type": "keyword"

## out.txt
> amazon-kinesis-video-streams-webrtc@1.0.4 develop /tmp/amazon-kinesis-video-streams-webrtc-sdk-js
> webpack-dev-server --config webpack.dev.config.js

Package version: 1.0.4
Starting type checking service...
ℹ ｢wds｣: Project is running at http://localhost:3001/
ℹ ｢wds｣: webpack output is served from /
ℹ ｢wds｣: Content not from webpack is served from /tmp/amazon-kinesis-video-streams-webrtc-sdk-js/examples
Type checking in progress...
ℹ ｢wdm｣: Hash: 87997616ccb8b085f416

## isitfit.txt

(base) root@080ae74773e0:/# isitfit cost analyze
Profiles in AWS credential file:
- default

(use `AWS_PROFILE=myprofile isitfit ...` or `isitfit command --profile=myprofile ...` to skip this prompt)
Profile to use [default]:
Number of days to lookback (between 1 and 90, use `isitfit cost --ndays=7 ...` to skip this prompt) [7]:
EC2 instances, counting in all regions            : 100%|███████████████████████████████████████████████████████████████████████████████| 18/18 [00:10<00:00,  2.49it/s]
Cloudtrail events in all regions                  : 100%|█████████████████████████████████████████████████████████████████████████████████| 1/1 [00:06<00:00,  6.73s/it]

## dup_strings.py
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cluster import KMeans

df = pd.read_excel('final_dupes_all.xlsx', sheet_name = 'all_records')
df.columns = [' xyz',  ... ' flg_univ ', ]
df['mylen'] = df.college_name.str.len()

## elastic_report.py
import pandas as pd
import numpy as np

import elasticsearch
from elasticsearch import helpers

myquery = 'your kibana query here...'

es_client = elasticsearch.Elasticsearch(
    "https://xxx.us-east-1.es.amazonaws.com"

## cache_example.py
import streamlit as st
import pandas as pd

# Reuse this data across runs!
read_and_cache_csv = st.cache(pd.read_csv)

BUCKET = "https://streamlit-self-driving.s3-us-west-2.amazonaws.com/"
data = read_and_cache_csv(BUCKET + "labels.csv.gz", nrows=1000)
desired_label = st.selectbox('Filter to:', ['car', 'truck'])
st.write(data[data.label == desired_label])
	# https://aws.amazon.com/blogs/big-data/accessing-external-components-using-amazon-redshift-lambda-udfs/

	import json

	def lambda_handler(event, context):
	number = str(event["arguments"][0][0])
	import requests
	ret = dict()
	try:
	res = list()
	# https://www.geodose.com/2020/06/how-to-create-coronavirus-time-series-map.html

	import pandas as pd
	import numpy as np

	df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")

	df = df.set_index(list(df.columns[:4]))
	df = df.stack().reset_index()
	df.columns = ["province", "country", "lat", "lon", "date", "n_death"]
	# /root/miniforge3/bin/pip install spacy
	Collecting spacy
	Using cached spacy-2.3.1.tar.gz (5.9 MB)
	Installing build dependencies ... error
	ERROR: Command errored out with exit status 1:
	command: /root/miniforge3/bin/python3.7 /root/miniforge3/lib/python3.7/site-packages/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-ahxo0t0p/overlay --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- setuptools wheel 'cython>=0.25' 'cymem>=2.0.2,<2.1.0' 'preshed>=3.0.2,<3.1.0' 'murmurhash>=0.28.0,<1.1.0' thinc==7.4.1
	cwd: None
	Complete output (196 lines):
	Collecting setuptools
	Downloading setuptools-49.1.2-py3-none-any.whl (789 kB)
	import pandas as pd
	import json

	with open("BrowserHistory.json", "r") as read_file:
	developer = json.load(read_file)

	df = pd.DataFrame(developer["Browser History"])

	df["UNIXTIME"] = pd.to_datetime(df["time_usec"], unit="us")
	DELETE bruhadkosh/

	PUT bruhadkosh
	{ "mappings": {
	"properties": {
	"kosh": {
	"type": "text",
	"fields": {
	"keyword": {
	"type": "keyword"
	> amazon-kinesis-video-streams-webrtc@1.0.4 develop /tmp/amazon-kinesis-video-streams-webrtc-sdk-js
	> webpack-dev-server --config webpack.dev.config.js

	Package version: 1.0.4
	Starting type checking service...
	ℹ ｢wds｣: Project is running at http://localhost:3001/
	ℹ ｢wds｣: webpack output is served from /
	ℹ ｢wds｣: Content not from webpack is served from /tmp/amazon-kinesis-video-streams-webrtc-sdk-js/examples
	Type checking in progress...
	ℹ ｢wdm｣: Hash: 87997616ccb8b085f416

	(base) root@080ae74773e0:/# isitfit cost analyze
	Profiles in AWS credential file:
	- default

	(use `AWS_PROFILE=myprofile isitfit ...` or `isitfit command --profile=myprofile ...` to skip this prompt)
	Profile to use [default]:
	Number of days to lookback (between 1 and 90, use `isitfit cost --ndays=7 ...` to skip this prompt) [7]:
	EC2 instances, counting in all regions : 100%\|███████████████████████████████████████████████████████████████████████████████\| 18/18 [00:10<00:00, 2.49it/s]
	Cloudtrail events in all regions : 100%\|█████████████████████████████████████████████████████████████████████████████████\| 1/1 [00:06<00:00, 6.73s/it]
	import pandas as pd
	import numpy as np

	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.cluster import KMeans

	df = pd.read_excel('final_dupes_all.xlsx', sheet_name = 'all_records')
	df.columns = [' xyz', ... ' flg_univ ', ]
	df['mylen'] = df.college_name.str.len()
	import pandas as pd
	import numpy as np

	import elasticsearch
	from elasticsearch import helpers

	myquery = 'your kibana query here...'

	es_client = elasticsearch.Elasticsearch(
	"https://xxx.us-east-1.es.amazonaws.com"
	import streamlit as st
	import pandas as pd

	# Reuse this data across runs!
	read_and_cache_csv = st.cache(pd.read_csv)

	BUCKET = "https://streamlit-self-driving.s3-us-west-2.amazonaws.com/"
	data = read_and_cache_csv(BUCKET + "labels.csv.gz", nrows=1000)
	desired_label = st.selectbox('Filter to:', ['car', 'truck'])
	st.write(data[data.label == desired_label])