Pema Gurung pemagrg1

## one hot encoding using numpy
import numpy as np
docs = "Can I eat the Pizza".lower().split()
doc1 = set(docs)
doc1 = sorted(doc1)
print ("\nvalues: ", doc1)

integer_encoded = []
for i in docs:
    v = np.where( np.array(doc1) == i)[0][0]
    integer_encoded.append(v)

## one hot encoding using sklearn
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# define example
# data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']


doc1 = "Can I eat the Pizza".lower()
doc2 = "You can eat the Pizza".lower()

## one hot encoding using Keras
from keras.preprocessing.text import Tokenizer
from numpy import array
from numpy import argmax
from keras.utils import to_categorical


doc = "Can I eat the Pizza".lower().split()

def using_Tokenizer(doc):
    # create the tokenizer

## one hot encoding using Tensorflow
import tensorflow as tf
import pandas as pd

text = 'My cat is a great cat'
tokens = text.lower().split()

vocab = set(tokens)
vocab = pd.Series(range(len(vocab)), index=vocab)

word_ids = vocab.loc[tokens].values

## pandas_get_total_row.py
"""
Get total of each column values
"""
def totalcount(data):
    return data.assign(Total=data.drop('Total', errors='ignore', axis=1).sum(1))

def pandas_get_total_row(df):
  df = df.pipe(totalcount).T.pipe(totalcount).T
  return df

## pandas_calculate_total.py

def get_total(df):
    df.loc['Total'] = pd.Series(df['Marks'].sum(), index = ['Marks'])
    return df

df = pd.DataFrame({'Subjects': ["Maths","Science","English"], 'Marks': [80,90,75]})
df = df.reindex(columns=['Subjects','Marks'])
df = get_total(df)
df

## sklearn-model-to-pickle.py
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
from sklearn import linear_model
Project_path = "<path to the project folder>"


## load-sklearn-pickle-and-predict.py
import pickle

Project_path = "<path to project>"
model_path = Project_path + "/08. Multi-class_text_classification/models/model.pickle"
vectorizer_path = Project_path + "/08. Multi-class_text_classification/models/vectorizer.pickle"

vectorizer = pickle.load(open(vectorizer_path,'rb'))
model = pickle.load(open(model_path,'rb'))
pred = model.predict(vectorizer.transform(["i have got a new phone. its from Apple.. and i love it!"]))[0]
print ("predicted class:", pred)

## url_check
"""
regex based to search if a page is inner page or home page or category page.
"""
import re


def url_check(url):
    url = url.split("/")
    url = list(filter(None, url))
    if "http" in url[0]:

## generating_sinewaves.py
"""
In audio production, a sample rate (or "sampling rate") defines how many times per second a sound is sampled.
Technically speaking, it is the frequency of samples used in a digital recording.
"""
import numpy as np
from scipy.io import wavfile

sampleRate = 100
frequency = 10
audio_length = 1 #second
	import numpy as np
	docs = "Can I eat the Pizza".lower().split()
	doc1 = set(docs)
	doc1 = sorted(doc1)
	print ("\nvalues: ", doc1)

	integer_encoded = []
	for i in docs:
	v = np.where( np.array(doc1) == i)[0][0]
	integer_encoded.append(v)
	from numpy import array
	from numpy import argmax
	from sklearn.preprocessing import LabelEncoder
	from sklearn.preprocessing import OneHotEncoder
	# define example
	# data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']


	doc1 = "Can I eat the Pizza".lower()
	doc2 = "You can eat the Pizza".lower()
	from keras.preprocessing.text import Tokenizer
	from numpy import array
	from numpy import argmax
	from keras.utils import to_categorical


	doc = "Can I eat the Pizza".lower().split()

	def using_Tokenizer(doc):
	# create the tokenizer
	import tensorflow as tf
	import pandas as pd

	text = 'My cat is a great cat'
	tokens = text.lower().split()

	vocab = set(tokens)
	vocab = pd.Series(range(len(vocab)), index=vocab)

	word_ids = vocab.loc[tokens].values
	"""
	Get total of each column values
	"""
	def totalcount(data):
	return data.assign(Total=data.drop('Total', errors='ignore', axis=1).sum(1))

	def pandas_get_total_row(df):
	df = df.pipe(totalcount).T.pipe(totalcount).T
	return df

	def get_total(df):
	df.loc['Total'] = pd.Series(df['Marks'].sum(), index = ['Marks'])
	return df

	df = pd.DataFrame({'Subjects': ["Maths","Science","English"], 'Marks': [80,90,75]})
	df = df.reindex(columns=['Subjects','Marks'])
	df = get_total(df)
	df
	from sklearn.feature_extraction.text import TfidfVectorizer
	import pandas as pd
	from sklearn import svm
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score
	import pickle
	from sklearn import linear_model
	Project_path = "<path to the project folder>"
	import pickle

	Project_path = "<path to project>"
	model_path = Project_path + "/08. Multi-class_text_classification/models/model.pickle"
	vectorizer_path = Project_path + "/08. Multi-class_text_classification/models/vectorizer.pickle"

	vectorizer = pickle.load(open(vectorizer_path,'rb'))
	model = pickle.load(open(model_path,'rb'))
	pred = model.predict(vectorizer.transform(["i have got a new phone. its from Apple.. and i love it!"]))[0]
	print ("predicted class:", pred)
	"""
	regex based to search if a page is inner page or home page or category page.
	"""
	import re


	def url_check(url):
	url = url.split("/")
	url = list(filter(None, url))
	if "http" in url[0]:
	"""
	In audio production, a sample rate (or "sampling rate") defines how many times per second a sound is sampled.
	Technically speaking, it is the frequency of samples used in a digital recording.
	"""
	import numpy as np
	from scipy.io import wavfile

	sampleRate = 100
	frequency = 10
	audio_length = 1 #second