Shantanu Oak shantanuo

## tf_learn.py
# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
import pandas as pd
import numpy as np
import re

import tensorflow_hub as hub
import tensorflow as tf
import keras
from tensorflow.python.keras.layers import Input, Dense, Lambda

## excel_diff_simple.py
old = pd.read_excel('../data/sample-address-1.xlsx', 'Sheet1', na_values=['NA'])
new = pd.read_excel('../data/sample-address-2.xlsx', 'Sheet1', na_values=['NA'])
old['version'] = "old"
new['version'] = "new"

all_data = pd.concat([old, new], axis='rows', ignore_index=True)
changes = all_data.reset_index().drop_duplicates(subset=["account number", "name", "street", "city","state", "postal code"], keep=False)

problem=changes['account number'].unique()

## medium_claps.py
import requests
import json

def clean_json_response(response):
    return json.loads(response.text.split('])}while(1);</x>')[1])

url = 'https://medium.com/@user/has-recommended?format=json'
mylist=list()
response = requests.get(url)
response_dict = clean_json_response(response)

## load_data.py
############Load libraries#####################################################
import cv2
import numpy as np
import os
from keras.utils import np_utils
###############################################################################
#cross-validation at the patient level
train_data_dir = '/home/ec2-user/SageMaker/malaria-detection-model/malaria/training'
valid_data_dir = '/home/ec2-user/SageMaker/malaria-detection-model/malaria/validation'
###############################################################################

## test.csv

          
            5432
            some_code
            case0
            this is ok
             6
             20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

            
              5432
              some_code
              case0
              this is ok
               6
               20181201031613

## test.csv
"%_first_name", "Very_long_column_names_with_CAPITAL_LetteRS_and_also_%$#@!", "Very_long_column_names_with_CAPITAL_LetteRS", "Very_long_column_names"
"shantanu", "Mumbai", "Maharashtra", "India"
"Amar", "Delhi", "Delhi", "India"
"Akbar", "Madras", "Tamil Nadu" , "India"

## marathi_nlp.py
! git clone https://github.com/cltk/marathi_text_wikisource.git

from collections import Counter
from nltk import word_tokenize
import pandas as pd
import os

fdist = Counter()

import os

## hunspell.py
docker run -it -v /tmp/:/usr/src/myapp -w /usr/src/myapp python:3 bash
apt-get update
apt-get install python-dev
apt-get install libhunspell-dev
pip install hunspell

import hunspell
hobj = hunspell.HunSpell('mr_IN1.dic', 'mr_IN1.aff')
for x in hobj.stem('घोड्याचा'):
    print (x.decode('utf-8'))

## nlp.py
## install
# apt-get update && apt-get install -y \
        build-essential \
        wget \
        git \
        python-dev \
        unzip \
        python-numpy \
        python-scipy \
        && rm -rf /var/cache/apk/*

## github_trending.py
>>> import urllib.request
>>> import bs4
>>> github_trending = urllib.request.urlopen("https://github.com/trending")
>>> trending_soup = bs4.BeautifulSoup(github_trending.read(), "lxml")
>>> ordered_list = trending_soup.find('ol') #single element
>>> for each_children in ordered_list.children:
...     h3 = each_children.find('h3')
...     if not isinstance(h3, int):
...         print(h3.text.strip())
	# https://towardsdatascience.com/transfer-learning-using-elmo-embedding-c4a7e415103c
	import pandas as pd
	import numpy as np
	import re

	import tensorflow_hub as hub
	import tensorflow as tf
	import keras
	from tensorflow.python.keras.layers import Input, Dense, Lambda
	old = pd.read_excel('../data/sample-address-1.xlsx', 'Sheet1', na_values=['NA'])
	new = pd.read_excel('../data/sample-address-2.xlsx', 'Sheet1', na_values=['NA'])
	old['version'] = "old"
	new['version'] = "new"

	all_data = pd.concat([old, new], axis='rows', ignore_index=True)
	changes = all_data.reset_index().drop_duplicates(subset=["account number", "name", "street", "city","state", "postal code"], keep=False)

	problem=changes['account number'].unique()
	import requests
	import json

	def clean_json_response(response):
	return json.loads(response.text.split('])}while(1);</x>')[1])

	url = 'https://medium.com/@user/has-recommended?format=json'
	mylist=list()
	response = requests.get(url)
	response_dict = clean_json_response(response)
	############Load libraries#####################################################
	import cv2
	import numpy as np
	import os
	from keras.utils import np_utils
	###############################################################################
	#cross-validation at the patient level
	train_data_dir = '/home/ec2-user/SageMaker/malaria-detection-model/malaria/training'
	valid_data_dir = '/home/ec2-user/SageMaker/malaria-detection-model/malaria/validation'
	###############################################################################
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
5432	some_code	case0	this is ok	6	20181201031613
	"%_first_name", "Very_long_column_names_with_CAPITAL_LetteRS_and_also_%$#@!", "Very_long_column_names_with_CAPITAL_LetteRS", "Very_long_column_names"
	"shantanu", "Mumbai", "Maharashtra", "India"
	"Amar", "Delhi", "Delhi", "India"
	"Akbar", "Madras", "Tamil Nadu" , "India"
	! git clone https://github.com/cltk/marathi_text_wikisource.git

	from collections import Counter
	from nltk import word_tokenize
	import pandas as pd
	import os

	fdist = Counter()

	import os
	docker run -it -v /tmp/:/usr/src/myapp -w /usr/src/myapp python:3 bash
	apt-get update
	apt-get install python-dev
	apt-get install libhunspell-dev
	pip install hunspell

	import hunspell
	hobj = hunspell.HunSpell('mr_IN1.dic', 'mr_IN1.aff')
	for x in hobj.stem('घोड्याचा'):
	print (x.decode('utf-8'))
	## install
	# apt-get update && apt-get install -y \
	build-essential \
	wget \
	git \
	python-dev \
	unzip \
	python-numpy \
	python-scipy \
	&& rm -rf /var/cache/apk/*
	>>> import urllib.request
	>>> import bs4
	>>> github_trending = urllib.request.urlopen("https://github.com/trending")
	>>> trending_soup = bs4.BeautifulSoup(github_trending.read(), "lxml")
	>>> ordered_list = trending_soup.find('ol') #single element
	>>> for each_children in ordered_list.children:
	... h3 = each_children.find('h3')
	... if not isinstance(h3, int):
	... print(h3.text.strip())