Skip to content

Instantly share code, notes, and snippets.

@iwalfy
Created December 17, 2022 14:09
Show Gist options
  • Save iwalfy/d7852b01803347d82ccb2cdc23b4d3d9 to your computer and use it in GitHub Desktop.
Save iwalfy/d7852b01803347d82ccb2cdc23b4d3d9 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
from pyquery import PyQuery
FIRST_ARTICLE=1
LAST_ARTICLE=58340
def main():
f = open("opennet.txt", "a")
for article in range(FIRST_ARTICLE, (LAST_ARTICLE + 1)):
url = "https://www.opennet.ru/opennews/art.shtml?num={}".format(article)
r = requests.get(url)
html = r.text
pq = PyQuery(html)
tag = pq("#r_memo > p:nth-child(1)")
text = tag.text()
if text:
percentage = round((article / LAST_ARTICLE) * 100, 2)
print(" Working on {} of {}... {}%".format(article, LAST_ARTICLE, percentage), end="\r")
f.write("{}\n".format(text))
f.flush()
f.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment