Skip to content

Instantly share code, notes, and snippets.

@tsundokul
Last active July 3, 2019 12:34
Show Gist options
  • Save tsundokul/8dcd93b63a6f2bee57a23ee2b2baf248 to your computer and use it in GitHub Desktop.
Save tsundokul/8dcd93b63a6f2bee57a23ee2b2baf248 to your computer and use it in GitHub Desktop.
Script that downloads the most popular WordPress plugins
#!/usr/bin/env python3
import requests
from lxml import html
# Make sure you have a directory named wp_plugins/ in the cwd
def get_links(url):
page = requests.get(url)
webpage = html.fromstring(page.content)
links = webpage.xpath('//div/main/article//div[2]/header/h2/a/@href')
for i, link in enumerate(links):
print("{}. {}".format(i, link))
download_link(link, str(i))
def download_link(link, i):
page = requests.get(link)
webpage = html.fromstring(page.content)
dl_link = webpage.xpath("//a[text()='Download']/@href")[0]
name = "wp_plugins/" + dl_link.split("/")[-1]
headers = {
'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0",
'Referer': link,
'Accept-Language': 'en-US,en;q=0.5',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
}
r = requests.get(dl_link, stream=True, headers=headers)
if r.status_code == 200:
with open(name, 'wb') as f:
for chunk in r:
f.write(chunk)
else:
print(r.status_code)
for i in range(1, 100):
print("========== PAGE {}".format(i))
get_links('https://wordpress.org/plugins/browse/popular/page/' + str(i))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment