Skip to content

Instantly share code, notes, and snippets.

@yashpapa6969
Created March 25, 2023 16:44
Show Gist options
  • Save yashpapa6969/611dcaafca51977e4b039f0176fa0aef to your computer and use it in GitHub Desktop.
Save yashpapa6969/611dcaafca51977e4b039f0176fa0aef to your computer and use it in GitHub Desktop.
amith
import requests
from bs4 import BeautifulSoup
import csv
import datetime
import uuid as id
import sqlite3
url = 'https://www.theverge.com/'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
articles = soup.find_all('ol', class_='relative')
lis = soup.find_all('li',class_='duet--content-cards--content-card group relative mx-auto flex max-w-container-sm flex-row border-b border-gray-31 bg-gray-13 text-white last-of-type:border-b-0 md:mx-0 md:max-w-full md:border-b-0 [&>div]:first-of-type:mt-0 [&>div]:first-of-type:pt-0 [&>div]:last-of-type:pb-0 [&>div]:last-of-type:md:border-b-0')
lis = set(lis)
current_date = datetime.datetime.now().strftime('%d%m%Y')
filename = datetime.datetime.now().strftime("%d%m%Y") + "_articles.db"
conn = sqlite3.connect(filename)
c = conn.cursor()
conn.execute('''CREATE TABLE IF NOT EXISTS article
(id INTEGER PRIMARY KEY,
url TEXT,
headline TEXT,
author TEXT);
''')
headlines = []
for article in lis:
#headline = article.h2.a.text.strip()
url = "https://www.theverge.com"+article.h2.a['href']
author = article.find('div', class_='inline-block').a.text.strip()
c.execute("select headline from article")
headlines.append(c.fetchall)
#headlines = c.fetchall()
for headline in {l.h2.a.text.strip() for l in lis}:
c.execute("INSERT INTO article (url, headline, author) VALUES (?, ?, ?)",
(url, headline, author))
print(headlines)
conn.commit()
c.execute("SELECT * FROM article")
result = c.fetchall()
print(result)
conn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment