dsaiztc/beautifulsoup.py

## beautifulsoup.py
from bs4 import BeautifulSoup

soup = BeautifulSoup(html_doc, 'html.parser')
print(soup.prettify())

tag = soup.kind_of_tag # retrieve the element with kind_of_tag (the first occurrence I suppose)
tag.name # name of the tag
tag.string # text of the tag
tag.attrs # attributes of the tag as a dictionary
tag['attribute_name'] # access to an attribute
tag.attribute_name # same

# search for tags
soup.find_all('tag-name') # find all 'tag-name' tags and return a list of them
for link in soup.find_all('a'):
  print(link.get('href'))

# search for tags (with attributes of them)
soup.find(id='my_id')
soup.find(id=True) # the id attribute has a value
soup.find_all(href=re.compile('.es$'))
soup.find_all(attrs={'data-foo': 'value'})
soup.find_all('a', attrs={'class': 'sister'})
soup.find_all(class_=re.compile('title')) # search by CSS class: <p class="title"><b>The Dormouse's story</b></p>
soup.find_all('a', class_='sister') # <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
# Any argument that’s not recognized will be turned into a filter on one of a tag’s attributes
# Some attributes, like the data-* attributes in HTML 5, have names that can’t be used as the names of keyword arguments

# search for strings instead of tags
soup.find_all('a', text='Elsie')
	from bs4 import BeautifulSoup

	soup = BeautifulSoup(html_doc, 'html.parser')
	print(soup.prettify())

	tag = soup.kind_of_tag # retrieve the element with kind_of_tag (the first occurrence I suppose)
	tag.name # name of the tag
	tag.string # text of the tag
	tag.attrs # attributes of the tag as a dictionary
	tag['attribute_name'] # access to an attribute
	tag.attribute_name # same

	# search for tags
	soup.find_all('tag-name') # find all 'tag-name' tags and return a list of them
	for link in soup.find_all('a'):
	print(link.get('href'))

	# search for tags (with attributes of them)
	soup.find(id='my_id')
	soup.find(id=True) # the id attribute has a value
	soup.find_all(href=re.compile('.es$'))
	soup.find_all(attrs={'data-foo': 'value'})
	soup.find_all('a', attrs={'class': 'sister'})
	soup.find_all(class_=re.compile('title')) # search by CSS class: <p class="title"><b>The Dormouse's story</b></p>
	soup.find_all('a', class_='sister') # <a class="sister" href="http://example.com/elsie" id="link1">Elsie</a>
	# Any argument that’s not recognized will be turned into a filter on one of a tag’s attributes
	# Some attributes, like the data-* attributes in HTML 5, have names that can’t be used as the names of keyword arguments

	# search for strings instead of tags
	soup.find_all('a', text='Elsie')