Skip to content

Instantly share code, notes, and snippets.

@parnexcodes
Created October 31, 2021 08:24
Show Gist options
  • Save parnexcodes/ce9a2fd90403488044e9def44a0d7109 to your computer and use it in GitHub Desktop.
Save parnexcodes/ce9a2fd90403488044e9def44a0d7109 to your computer and use it in GitHub Desktop.
asianembed recent episodes scraping
import requests
import pprint
from bs4 import BeautifulSoup
def get_recent(number):
URL = f"https://asianembed.com/?page={number}"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'lxml')
items = soup.find_all('ul', {'class': 'listing items'})
result = []
for item in items:
for list in item.find_all('li'):
title = list.a.find('div', {'class': 'name'}).text.strip()
posted = list.a.find('span', {'class': 'date'}).text
poster = list.a.img['src']
id = list.a.get('href')
data = {'posted': posted, 'title' : title, 'poster': poster, 'id': id[8:]}
result.append(data)
return pprint.pprint(result)
get_recent(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment