nveenverma/block26.python Secret

## block26.python
def get_top_questions(url, question_count):
    # WARNING: Only enter one of these 3 values [15, 30, 50].
    # Since, stackoverflow, doesn't display any other size questions list
    url = url + "?sort=votes&pagesize={}".format(question_count)

    # Using requests module for downloading webpage content
    response = requests.get(url)

    # Parsing html data using BeautifulSoup
    soup = bs(response.content, 'html.parser')
    body = soup.find('body')

    # Extracting Top Questions
    question_links = body1.select("h3 a.question-hyperlink")
    error_checking(question_links, question_count)                     # Error Checking
    questions = [i.text for i in question_links]                       # questions list

    # Extracting Summary
    summary_divs = body1.select("div.excerpt")
    error_checking(summary_divs, question_count)                       # Error Checking
    summaries = [i.text.strip() for i in summary_divs]                 # summaries list

    # Extracting Tags
    tags_divs = body1.select("div.summary > div:nth-of-type(2)")

    error_checking(tags_divs, question_count)                          # Error Checking
    a_tags_list = [i.select('a') for i in tags_divs]                   # tag links

    tags = []

    for a_group in a_tags_list:
        tags.append([a.text for a in a_group])                         # tags list

    # Extracting Number of votes
    vote_spans = body1.select("span.vote-count-post strong")
    error_checking(vote_spans, question_count)                         # Error Checking
    no_of_votes = [int(i.text) for i in vote_spans]                    # votes list

    # Extracting Number of answers
    answer_divs = body1.select("div.status strong")
    error_checking(answer_divs, question_count)                        # Error Checking
    no_of_answers = [int(i.text) for i in answer_divs]                 # answers list

    # Extracting Number of views
    div_views = body1.select("div.supernova")

    error_checking(div_views, question_count)                          # Error Checking
    no_of_views = [i['title'] for i in div_views]
    no_of_views = [i[:-6].replace(',', '') for i in no_of_views]
    no_of_views = [int(i) for i in no_of_views]                        # views list

    # Putting all of them together
    df = pd.DataFrame({'question': questions,
                       'summary': summaries,
                       'tags': tags,
                       'no_of_votes': no_of_votes,
                       'no_of_answers': no_of_answers,
                       'no_of_views': no_of_views})

    return df
	def get_top_questions(url, question_count):
	# WARNING: Only enter one of these 3 values [15, 30, 50].
	# Since, stackoverflow, doesn't display any other size questions list
	url = url + "?sort=votes&pagesize={}".format(question_count)

	# Using requests module for downloading webpage content
	response = requests.get(url)

	# Parsing html data using BeautifulSoup
	soup = bs(response.content, 'html.parser')
	body = soup.find('body')

	# Extracting Top Questions
	question_links = body1.select("h3 a.question-hyperlink")
	error_checking(question_links, question_count) # Error Checking
	questions = [i.text for i in question_links] # questions list

	# Extracting Summary
	summary_divs = body1.select("div.excerpt")
	error_checking(summary_divs, question_count) # Error Checking
	summaries = [i.text.strip() for i in summary_divs] # summaries list

	# Extracting Tags
	tags_divs = body1.select("div.summary > div:nth-of-type(2)")

	error_checking(tags_divs, question_count) # Error Checking
	a_tags_list = [i.select('a') for i in tags_divs] # tag links

	tags = []

	for a_group in a_tags_list:
	tags.append([a.text for a in a_group]) # tags list

	# Extracting Number of votes
	vote_spans = body1.select("span.vote-count-post strong")
	error_checking(vote_spans, question_count) # Error Checking
	no_of_votes = [int(i.text) for i in vote_spans] # votes list

	# Extracting Number of answers
	answer_divs = body1.select("div.status strong")
	error_checking(answer_divs, question_count) # Error Checking
	no_of_answers = [int(i.text) for i in answer_divs] # answers list

	# Extracting Number of views
	div_views = body1.select("div.supernova")

	error_checking(div_views, question_count) # Error Checking
	no_of_views = [i['title'] for i in div_views]
	no_of_views = [i[:-6].replace(',', '') for i in no_of_views]
	no_of_views = [int(i) for i in no_of_views] # views list

	# Putting all of them together
	df = pd.DataFrame({'question': questions,
	'summary': summaries,
	'tags': tags,
	'no_of_votes': no_of_votes,
	'no_of_answers': no_of_answers,
	'no_of_views': no_of_views})

	return df