Skip to content

Instantly share code, notes, and snippets.

@rmuhamedgaliev
Created June 23, 2015 20:13
Show Gist options
  • Save rmuhamedgaliev/0c4ef2897ec29ba9b7fb to your computer and use it in GitHub Desktop.
Save rmuhamedgaliev/0c4ef2897ec29ba9b7fb to your computer and use it in GitHub Desktop.
Simple demo for parsers for URL http://снип.рф/snip/view/40
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'rmuhamedgaliev'
from bs4 import BeautifulSoup
import urllib2
def get_html_site():
response = urllib2.urlopen('http://xn--h1ajhf.xn--p1ai/snip/view/40')
return response.read()
if __name__ == '__main__':
site = get_html_site()
soup = BeautifulSoup(''.join(site))
mainDiv = soup.find('div', {'id': 'content'})
# Get number
number = mainDiv.find('a').contents[0]
# Get title
title = mainDiv.find('a').contents[2]
# Get field of use
filedOfUse = mainDiv.find('tr', {'class', 'odd'}).find('td').text
# Get toc
toc = mainDiv.find('div', {'class', 'toc'})
# Process toc
for e in toc.findAll('br'):
e.extract()
print('\nTitle\n=============================================\n')
print(title)
print('\nNumber\n=============================================\n')
print(number)
print('\nToC\n=============================================\n')
print('\n'.join(toc))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment