Skip to content

Instantly share code, notes, and snippets.

@ksv-muralidhar
Last active August 23, 2021 08:55
Show Gist options
  • Save ksv-muralidhar/a78830a9a2b75ecb71eb79599a391c6a to your computer and use it in GitHub Desktop.
Save ksv-muralidhar/a78830a9a2b75ecb71eb79599a391c6a to your computer and use it in GitHub Desktop.
bing_streamlit
import pandas as pd
from bs4 import BeautifulSoup
import requests as r
import streamlit as st
st.markdown('<h1 style="background-color: gainsboro; padding-left: 10px; padding-bottom: 20px;">Search Engine Scraper</h1>', unsafe_allow_html=True)
query = st.text_input('', help='Enter the search string and hit Enter/Return')
query = query.replace(" ", "+") #replacing the spaces in query result with +
if query: #Activates the code below on hitting Enter/Return in the search textbox
try:#Exception handling
req = r.get(f"https://www.bing.com/search?q={query}",
headers = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"})
result_str = '<html><table style="border: none;">' #Initializing the HTML code for displaying search results
if req.status_code == 200: #Status code 200 indicates a successful request
bs = BeautifulSoup(req.content, features="html.parser") #converting the content/text returned by request to a BeautifulSoup object
search_result = bs.find_all("li", class_="b_algo") #'b_algo' is the class of the list object which represents a single result
search_result = [str(i).replace("<strong>","") for i in search_result] #removing the <strong> tag
search_result = [str(i).replace("</strong>","") for i in search_result] #removing the </strong> tag
result_df = pd.DataFrame() #Initializing the data frame that stores the results
for n,i in enumerate(search_result): #iterating through the search results
individual_search_result = BeautifulSoup(i, features="html.parser") #converting individual search result into a BeautifulSoup object
h2 = individual_search_result.find('h2') #Finding the title of the individual search result
href = h2.find('a').get('href') #title's URL of the individual search result
cite = f'{href[:50]}...' if len(href) >= 50 else href # cite with first 20 chars of the URL
url_txt = h2.find('a').text #title's text of the individual search result
#In a few cases few individual search results doesn't have a description. In such cases the description would be blank
description = "" if individual_search_result.find('p') is None else individual_search_result.find('p').text
#Appending the result data frame after processing each individual search result
result_df = result_df.append(pd.DataFrame({"Title": url_txt, "URL": href, "Description": description}, index=[n]))
count_str = f'<b style="font-size:20px;">Bing Search returned {len(result_df)} results</b>'
########################################################
######### HTML code to display search results ##########
########################################################
result_str += f'<tr style="border: none;"><h3><a href="{href}" target="_blank">{url_txt}</a></h3></tr>'+\
f'<tr style="border: none;"><strong style="color:green;">{cite}</strong></tr>'+\
f'<tr style="border: none;">{description}</tr>'+\
f'<tr style="border: none;"><td style="border: none;"></td></tr>'
result_str += '</table></html>'
#if the status code of the request isn't 200, then an error message is displayed along with an empty data frame
else:
result_df = pd.DataFrame({"Title": "", "URL": "", "Description": ""}, index=[0])
result_str = '<html></html>'
count_str = '<b style="font-size:20px;">Looks like an error!!</b>'
#if an exception is raised, then an error message is displayed along with an empty data frame
except:
result_df = pd.DataFrame({"Title": "", "URL": "", "Description": ""}, index=[0])
result_str = '<html></html>'
count_str = '<b style="font-size:20px;">Looks like an error!!</b>'
st.markdown(f'{count_str}', unsafe_allow_html=True)
st.markdown(f'{result_str}', unsafe_allow_html=True)
st.markdown('<h3>Data Frame of the above search result</h3>', unsafe_allow_html=True)
st.dataframe(result_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment