Skip to content

Instantly share code, notes, and snippets.

@stranger777
Last active May 24, 2024 16:45
Show Gist options
  • Save stranger777/db2c91f7ee2ff1932da8a66c4bf52ef2 to your computer and use it in GitHub Desktop.
Save stranger777/db2c91f7ee2ff1932da8a66c4bf52ef2 to your computer and use it in GitHub Desktop.
import os
from flask import Flask, request, Response
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from feedgen.feed import FeedGenerator
app = Flask(__name__)
def generate_rss_feed(url, title_selector, link_selector, description_selector):
chrome_options = Options()
chrome_options.add_argument("--headless")
service = Service(os.path.join(os.path.dirname(__file__), "chromedriver.exe"))
service.start()
with webdriver.Chrome(service=service, options=chrome_options) as driver:
driver.get(url)
WebDriverWait(driver, 30).until(EC.visibility_of_element_located((By.TAG_NAME, 'html')))
soup = BeautifulSoup(driver.page_source, 'html.parser')
fg = FeedGenerator()
fg.title('RSS Feed')
fg.link(href=url, rel='alternate')
fg.description('RSS feed generated using Selenium and Flask')
for title, link, description in zip(soup.select(title_selector), soup.select(link_selector), soup.select(description_selector)):
fe = fg.add_entry()
fe.title(title.get_text())
fe.link(href=link['href'])
fe.description(description.get_text())
return fg.rss_str(pretty=True)
@app.route('/rss')
def rss():
args = request.args
if not all(args.get(k) for k in ['url', 'title_selector', 'link_selector', 'description_selector']):
return "Необходимо указать URL и селекторы для заголовка, ссылки и описания.", 400
return Response(generate_rss_feed(**args), mimetype='text/xml')
if __name__ == '__main__':
app.run(debug=True)
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>RSS Feed Generator</title>
</head>
<body>
<form action="http://localhost:5000/rss" method="GET">
<label for="url">URL:</label>
<input type="text" id="url" name="url" required>
<br>
<label for="title_selector">Title Selector:</label>
<input type="text" id="title_selector" name="title_selector" required>
<br>
<label for="link_selector">Link Selector:</label>
<input type="text" id="link_selector" name="link_selector" required>
<br>
<label for="description_selector">Description Selector:</label>
<input type="text" id="description_selector" name="description_selector" required>
<br>
<button type="submit">Generate RSS Feed</button>
</form>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment