Created
November 1, 2016 13:25
-
-
Save ahmedash95/15f747f431f4c38a5884badd7e1c1647 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import requests | |
import re | |
import urllib2 | |
from bs4 import BeautifulSoup | |
class url_grabber: | |
def __init__(self,url): | |
self.url = url | |
print "%s :cloning" % (url) | |
def run(self): | |
self.get_content() | |
self.get_title() | |
self.get_keywords() | |
self.get_image() | |
self.get_element_content() | |
def get_content(self): | |
response = urllib2.urlopen(self.url) | |
self.soup = BeautifulSoup(response.read(), "lxml") | |
def get_title(self): | |
title = self.soup.find('title').text | |
print title | |
def get_keywords(self): | |
keywords = self.soup.findAll(attrs={"name":"keywords"})[0]['content'].encode('utf-8') | |
print keywords | |
def get_image(self): | |
image = self.soup.findAll(attrs={"property":"og:image"})[0]['content'].encode('utf-8') | |
print image | |
def get_element_content(self,element): | |
element_content = self.soup.select(element)[0].text | |
print element_content |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment