Skip to content

Instantly share code, notes, and snippets.

@ahmedash95
Created November 1, 2016 13:25
Show Gist options
  • Save ahmedash95/15f747f431f4c38a5884badd7e1c1647 to your computer and use it in GitHub Desktop.
Save ahmedash95/15f747f431f4c38a5884badd7e1c1647 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import requests
import re
import urllib2
from bs4 import BeautifulSoup
class url_grabber:
def __init__(self,url):
self.url = url
print "%s :cloning" % (url)
def run(self):
self.get_content()
self.get_title()
self.get_keywords()
self.get_image()
self.get_element_content()
def get_content(self):
response = urllib2.urlopen(self.url)
self.soup = BeautifulSoup(response.read(), "lxml")
def get_title(self):
title = self.soup.find('title').text
print title
def get_keywords(self):
keywords = self.soup.findAll(attrs={"name":"keywords"})[0]['content'].encode('utf-8')
print keywords
def get_image(self):
image = self.soup.findAll(attrs={"property":"og:image"})[0]['content'].encode('utf-8')
print image
def get_element_content(self,element):
element_content = self.soup.select(element)[0].text
print element_content
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment