Skip to content

Instantly share code, notes, and snippets.

@vermasachin
Created August 3, 2018 18:01
Show Gist options
  • Save vermasachin/a918cb44210c69c37ff183e9322471ce to your computer and use it in GitHub Desktop.
Save vermasachin/a918cb44210c69c37ff183e9322471ce to your computer and use it in GitHub Desktop.
Counts and prints the number of occurrences of alphabets in any web page
from requests import get
from bs4 import BeautifulSoup
import urllib
import string
myurl = "https://sachinverma.net/"
soup = urllib.request.urlopen(myurl)
def clean_me(html):
soup = BeautifulSoup(html, 'html.parser')
for s in soup(['script', 'style']):
s.decompose()
# print(' '.join(soup.stripped_strings))
global mytext
mytext = ' '.join(soup.stripped_strings)
clean_me(soup)
# print(mytext)
mydict = {}
# print(string.ascii_lowercase[0])
for x in range (0, 26):
# print(x)
mydict[string.ascii_lowercase[x]] = 0
# print(mydict)
for a in mytext:
for key in mydict:
if a == key:
mydict[key] += 1
print(mydict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment