Skip to content

Instantly share code, notes, and snippets.

@nveenverma
Created June 14, 2019 07:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nveenverma/d3dab2f2060b63aba839eae8b8cb1bd1 to your computer and use it in GitHub Desktop.
Save nveenverma/d3dab2f2060b63aba839eae8b8cb1bd1 to your computer and use it in GitHub Desktop.
# Function to check, if there is any error in length of the extracted bs4 object
def error_checking(list_name, length):
if (len(list_name) != length):
print("Error in {} parsing, length not equal to {}!!!".format(list_name, length))
return -1
else:
pass
def get_top_languages(url):
# Using requests module for downloading webpage content
response = requests.get(url)
# Parsing html data using BeautifulSoup
soup = bs(response.content, 'html.parser')
body = soup.find('body')
# Extracting Top Langauges
lang_tags = body.find_all('a', class_='post-tag')
error_checking(lang_tags, 36) # Error Checking
languages = [i.text for i in lang_tags] # Languages List
# Extracting Tag Counts
tag_counts = body.find_all('span', class_='item-multiplier-count')
error_checking(tag_counts, 36) # Error Checking
no_of_tags = [int(i.text) for i in tag_counts] # Tag Counts List
# Putting the two lists together
df = pd.DataFrame({'Languages':languages,
'Tag Count':no_of_tags})
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment