Skip to content

Instantly share code, notes, and snippets.

@PandaWhoCodes
Created December 11, 2016 10:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PandaWhoCodes/531af2b5872f257af3c7c9ee535c6821 to your computer and use it in GitHub Desktop.
Save PandaWhoCodes/531af2b5872f257af3c7c9ee535c6821 to your computer and use it in GitHub Desktop.
Getting the RSS feed link from a simple google search
import requests
from bs4 import BeautifulSoup
def get_rss_feed(website_url):
if website_url is None:
print("URL should not be null")
else:
source_code = requests.get(website_url)
plain_text = source_code.text
soup = BeautifulSoup(plain_text)
for link in soup.find_all("link", {"type" : "sitemap"}):
href = link.get('href')
print("RSS feed for " + website_url + "is -->" + str(href))
import pprint
from googleapiclient.discovery import build
my_api_key = "APIKEY"
my_cse_id = "CUSTOM SEARCH ID"
def google_search(search_term, api_key, cse_id, **kwargs):
service = build("customsearch", "v1", developerKey=api_key)
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()
return res['items']
def parseArticle(myquery,site):
results=google_search(site+" "+myquery,my_api_key,my_cse_id,num=4)
print(results)
for items in results:
if site in items['link'][0]:
print(items['link'][0])
print(items['htmlTitle'][0])
parseArticle("windows 10","ashtricks.com")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment