Skip to content

Instantly share code, notes, and snippets.

View pemagrg1's full-sized avatar
:octocat:
working...

Pema Gurung pemagrg1

:octocat:
working...
View GitHub Profile
@pemagrg1
pemagrg1 / one hot encoding using numpy
Created January 9, 2019 04:31
one hot encoding using numpy
import numpy as np
docs = "Can I eat the Pizza".lower().split()
doc1 = set(docs)
doc1 = sorted(doc1)
print ("\nvalues: ", doc1)
integer_encoded = []
for i in docs:
v = np.where( np.array(doc1) == i)[0][0]
integer_encoded.append(v)
@pemagrg1
pemagrg1 / one hot encoding using sklearn
Created January 9, 2019 04:36
one hot encoding using sklearn
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# define example
# data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']
doc1 = "Can I eat the Pizza".lower()
doc2 = "You can eat the Pizza".lower()
@pemagrg1
pemagrg1 / one hot encoding using Keras
Created January 9, 2019 04:37
one hot encoding using Keras
from keras.preprocessing.text import Tokenizer
from numpy import array
from numpy import argmax
from keras.utils import to_categorical
doc = "Can I eat the Pizza".lower().split()
def using_Tokenizer(doc):
# create the tokenizer
@pemagrg1
pemagrg1 / one hot encoding using Tensorflow
Created January 10, 2019 10:52
one hot encoding using Tensorflow
import tensorflow as tf
import pandas as pd
text = 'My cat is a great cat'
tokens = text.lower().split()
vocab = set(tokens)
vocab = pd.Series(range(len(vocab)), index=vocab)
word_ids = vocab.loc[tokens].values
"""
Get total of each column values
"""
def totalcount(data):
return data.assign(Total=data.drop('Total', errors='ignore', axis=1).sum(1))
def pandas_get_total_row(df):
df = df.pipe(totalcount).T.pipe(totalcount).T
return df
def get_total(df):
df.loc['Total'] = pd.Series(df['Marks'].sum(), index = ['Marks'])
return df
df = pd.DataFrame({'Subjects': ["Maths","Science","English"], 'Marks': [80,90,75]})
df = df.reindex(columns=['Subjects','Marks'])
df = get_total(df)
df
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
from sklearn import linear_model
Project_path = "<path to the project folder>"
import pickle
Project_path = "<path to project>"
model_path = Project_path + "/08. Multi-class_text_classification/models/model.pickle"
vectorizer_path = Project_path + "/08. Multi-class_text_classification/models/vectorizer.pickle"
vectorizer = pickle.load(open(vectorizer_path,'rb'))
model = pickle.load(open(model_path,'rb'))
pred = model.predict(vectorizer.transform(["i have got a new phone. its from Apple.. and i love it!"]))[0]
print ("predicted class:", pred)
"""
regex based to search if a page is inner page or home page or category page.
"""
import re
def url_check(url):
url = url.split("/")
url = list(filter(None, url))
if "http" in url[0]:
"""
In audio production, a sample rate (or "sampling rate") defines how many times per second a sound is sampled.
Technically speaking, it is the frequency of samples used in a digital recording.
"""
import numpy as np
from scipy.io import wavfile
sampleRate = 100
frequency = 10
audio_length = 1 #second