Zaynaib (Ola) Giwa zaynaib

## gist:71ba9a3dfb83cdd9f808
from bs4 import BeautifulSoup
import requests
import pandas as pd
url = "https://www.akc.org/reg/dogreg_stats.cfm"
r=requests.get(url)
data= r.text
soup = BeautifulSoup(data)

table = soup.find_all('table')[1]
rows = table.find_all('tr')

## tf_idf.py
import glob
import math
line=''
s=set()
flist=glob.glob(r'E:\PROGRAMMING\PYTHON\programs\corpus2\*.txt') #get all the files from the d`#open each file >> tokenize the content >> and store it in a set
for fname in flist:
    tfile=open(fname,"r")
    line=tfile.read() # read the content of file and store in "line"
    tfile.close() # close the file
    s=s.union(set(line.split(' '))) # union of common words

## tfidf.py
import math
from text.blob import TextBlob as tb

def tf(word, blob):
    return blob.words.count(word) / len(blob.words)

def n_containing(word, bloblist):
    return sum(1 for blob in bloblist if word in blob)

def idf(word, bloblist):

## remove-stopwords.py
# Given a list of words, remove any that are
# in a list of stop words.

def removeStopwords(wordlist, stopwords):
    return [w for w in wordlist if w not in stopwords]

## tf_idf.py
import glob
import math
line=''
s=set()
flist=glob.glob(r'E:\PROGRAMMING\PYTHON\programs\corpus2\*.txt') #get all the files from the d`#open each file >> tokenize the content >> and store it in a set
for fname in flist:
    tfile=open(fname,"r")
    line=tfile.read() # read the content of file and store in "line"
    tfile.close() # close the file
    s=s.union(set(line.split(' '))) # union of common words

## idf.py
from string import digits
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
import glob
import math
import csv
import string
from collections import Counter

## twitterCloud
library(twitteR)
library(tm)
library(wordcloud)
library(RColorBrewer)

midnight <- searchTwitter("#MakeAMovieSmarter",  n = 1000)
midnight_text = sapply(midnight, function(x) x$getText())
midnight_corpus = Corpus(VectorSource(midnight_text))

tdm = TermDocumentMatrix(

## Wikipedia Viewer.markdown

      
              5 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zaynaib
                / Wikipedia Viewer.markdown
            
            
              Last active
              August 29, 2015 14:26
            
              
                doaLMG
              
          
    doaLMG

A Pen by OG on CodePen.
License.

  
## Calculator.markdown

      
              3 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zaynaib
                / Calculator.markdown
            
            
              Created
              August 17, 2015 17:13
            
              
                Calculator
              
          
    Calculator

A Pen by OG on CodePen.
License.

  
## Twitch.markdown

      
              5 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                zaynaib
                / Twitch.markdown
            
            
              Last active
              August 29, 2015 14:27
            
              
                Twitch
              
          
    Twitch

A Pen by OG on CodePen.
License.
	from bs4 import BeautifulSoup
	import requests
	import pandas as pd
	url = "https://www.akc.org/reg/dogreg_stats.cfm"
	r=requests.get(url)
	data= r.text
	soup = BeautifulSoup(data)

	table = soup.find_all('table')[1]
	rows = table.find_all('tr')
	import glob
	import math
	line=''
	s=set()
	flist=glob.glob(r'E:\PROGRAMMING\PYTHON\programs\corpus2\*.txt') #get all the files from the d`#open each file >> tokenize the content >> and store it in a set
	for fname in flist:
	tfile=open(fname,"r")
	line=tfile.read() # read the content of file and store in "line"
	tfile.close() # close the file
	s=s.union(set(line.split(' '))) # union of common words
	import math
	from text.blob import TextBlob as tb

	def tf(word, blob):
	return blob.words.count(word) / len(blob.words)

	def n_containing(word, bloblist):
	return sum(1 for blob in bloblist if word in blob)

	def idf(word, bloblist):
	from string import digits
	import nltk
	from nltk import word_tokenize
	from nltk.corpus import stopwords
	import glob
	import math
	import csv
	import string
	from collections import Counter
	library(twitteR)
	library(tm)
	library(wordcloud)
	library(RColorBrewer)

	midnight <- searchTwitter("#MakeAMovieSmarter", n = 1000)
	midnight_text = sapply(midnight, function(x) x$getText())
	midnight_corpus = Corpus(VectorSource(midnight_text))

	tdm = TermDocumentMatrix(