Skip to content

Instantly share code, notes, and snippets.

View Venkatstatistics's full-sized avatar

Venkat Venkatstatistics

  • Aryma Labs
  • Bangalore
View GitHub Profile
#Resume Phrase Matcher code
#importing all required libraries
import PyPDF2
import os
from os import listdir
from os.path import isfile, join
from io import StringIO
FROM rocker/r-ver:devel
RUN apt-get update && apt-get install -y \
sudo \
gdebi-core \
pandoc \
pandoc-citeproc \
libcurl4-gnutls-dev \
libcairo2-dev \
libxt-dev \
# -*- coding: utf-8 -*-
from gensim.models.word2vec import Word2Vec
import gensim.downloader as api
#corpus = api.load('word2vec-google-news-300')
#corpus = api.load('glove-wiki-gigaword-100')
#model = api.load('glove-wiki-gigaword-100')
corpus = api.load('text8') # download the corpus and return it opened as an iterable
model = Word2Vec(corpus) # train a model from the corpus
@Venkatstatistics
Venkatstatistics / Recommender engine
Last active July 13, 2020 09:27
Recommender Engine - Under the hood
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
ds = pd.read_csv("test1.csv") #you can plug in your own list of products or movies or books here as csv file#
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
#ngram explanation begins#
#ngram (1,3) can be explained as follows#
#ngram(1,3) encompasses uni gram, bi gram and tri gram
#consider the sentence "The ball fell"
###Spacy Tutorials###
## References: https://course.spacy.io/chapter1 ##
## References: https://spacy.io/usage/spacy-101 ##
### Learning to work with NLP object ###
from spacy.lang.en import English
nlp = English ()
import spacy
from spacy.lang.en import English
nlpsm = English()
sbd = nlpsm.create_pipe('sentencizer')
nlpsm.add_pipe(sbd)
import en_vectors_web_lg
nlplg = en_vectors_web_lg.load()
nlplg.add_pipe(sbd)
# -*- coding: utf-8 -*-
#lowercasing
texts=["JOHN","keLLY","ArJUN","SITA"]
lower_words=[word.lower() for word in texts]
lower_words
#Stemming
import nltk
import pandas as pd
ID,Book Title
1,Probabilistic Graphical Models
2,Bayesian Data Analysis
3,Doing data science
4,Pattern Recognition and Machine Learning
5,The Elements of Statistical Learning
6,An introduction to Statistical Learning
7,Python Machine Learning
8,Natural Langauage Processing with Python
9,Statistical Distributions
import time
start = time.time()
import csv
import sys
import pandas as pd
import numpy as np
from operator import itemgetter
import redis
def dump():
with open('results.csv', 'w') as f:
for key in r.lrange('results', 0, -1):
print(key)
f.write(key.decode('utf-8'))
f.write('\n')
if __name__== "__main__":