Created
October 8, 2021 09:42
-
-
Save Hammer2900/1cbe1a88d8fedd9620bb9b6aae53ad7e to your computer and use it in GitHub Desktop.
Bot for evaluating products in a store rztk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import requests | |
import telebot | |
from box import Box | |
from meiga import Result, Error, Failure | |
from rich.console import Console | |
from rich.progress import track | |
from rich.table import Table | |
RZTK_BOT_KEY = os.environ.get('RZTK_BOT_KEY') | |
RZTK_TMP_FILE_PATH = os.environ.get('RZTK_TMP_FILE_PATH', '/tmp/out.html') | |
bot = telebot.TeleBot(RZTK_BOT_KEY, parse_mode=None) | |
regex = re.compile( | |
r'^(?:http|ftp)s?://' # http:// or https:// | |
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... | |
r'localhost|' # localhost... | |
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip | |
r'(?::\d+)?' # optional port | |
r'(?:/?|[/?]\S+)$', | |
re.IGNORECASE, | |
) | |
def parse_rztk_comments(page: int = 1, goods: int = 224125939) -> Result[Box, Error]: | |
url = ( | |
f'https://product-api.rozetka.com.ua/v4/comments/' | |
f'get?front-type=xl&country=UA&lang=ru&goods={goods}&page={page}&sort=from_buyer&type=comment&limit=10' | |
) | |
headers = { | |
'Accept': 'application/json, text/plain, */*', | |
'User-Agent': ( | |
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)' | |
' Chrome/91.0.4472.114 Safari/537.36' | |
), | |
'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', | |
'Origin': 'https://rozetka.com.ua', | |
} | |
try: | |
response = requests.request('GET', url, headers=headers) | |
if response.status_code != 200: | |
raise ValueError('Status code error') | |
new_box = Box(response.json()) | |
if not hasattr(new_box, 'data'): | |
raise ValueError('Data in box not valid') | |
return Result(success=new_box) | |
except Exception as e: | |
return Failure(e) | |
def check_url(url: str) -> Result[str, Error]: | |
if ( | |
re.match(regex, url) | |
and url.startswith('https://hard.rozetka.com.ua/') | |
or url.startswith('https://rozetka.com.ua/') | |
or url.startswith('https://bt.rozetka.com.ua/') | |
): | |
return Result(success=url) | |
return Failure(Error('Url not match')) | |
def search_re_group_url(url: str) -> Result[int, Error]: | |
try: | |
return Result(success=int(re.search('\/p(\d+)\/', url).group(1))) | |
except Exception as e: | |
return Failure(ValueError(e)) | |
def struct_table() -> Table: | |
table = Table(show_header=False, show_edge=False) | |
table.add_column() | |
table.add_column() | |
return table | |
def star_table(box_item) -> Table: | |
table = Table(title='Stars', show_header=True) | |
table.add_column('Star', justify='right', style='cyan', no_wrap=True) | |
table.add_column('Count', style='magenta') | |
table.add_row('Star 5', str(box_item.data.total_comments.comment_count_marks_5)) | |
table.add_row('Star 4', str(box_item.data.total_comments.comment_count_marks_4)) | |
table.add_row('Star 3', str(box_item.data.total_comments.comment_count_marks_3)) | |
table.add_row('Star 2', str(box_item.data.total_comments.comment_count_marks_2)) | |
table.add_row('Star 1', str(box_item.data.total_comments.comment_count_marks_1)) | |
return table | |
def percent_table(url: str, percent_up: int, percent_down: int) -> Table: | |
table = Table(title=f'Url: {url} Good/bad: [{percent_up}% <---> {percent_down}%]') | |
table.add_column('Name', justify='right', style='cyan', no_wrap=True) | |
table.add_column('Date', style='magenta') | |
table.add_column('Comment', style='magenta') | |
table.add_column('Star', style='magenta') | |
return table | |
@bot.message_handler(func=lambda m: True) | |
def echo_all(message): | |
""" | |
Find and parse info for rztk market. | |
""" | |
url = check_url(message.text) | |
if url.is_failure: | |
bot.reply_to(message, f'Error: {url.value}') | |
return | |
url = url.unwrap() | |
group = search_re_group_url(url) | |
if group.is_failure: | |
bot.reply_to(message, f'Error: {group.value}') | |
return | |
item_id = group.unwrap() | |
bot.reply_to(message, f'Working on item id: {item_id}') | |
response_first = parse_rztk_comments(1, item_id) | |
if response_first.is_failure: | |
bot.reply_to(message, f'Error: {response_first.value}') | |
return | |
response_first = response_first.unwrap() | |
structure_table = struct_table() | |
stars_table = star_table(response_first) | |
page_size = response_first.data.pages.count | |
title = response_first.data.record.fulltitle | |
all_count = sum( | |
[ | |
response_first.data.total_comments.comment_count_marks_1, | |
response_first.data.total_comments.comment_count_marks_2, | |
response_first.data.total_comments.comment_count_marks_3, | |
response_first.data.total_comments.comment_count_marks_4, | |
response_first.data.total_comments.comment_count_marks_5, | |
] | |
) | |
good_count = response_first.data.total_comments.comment_count_marks_5 | |
bad_count = sum( | |
[ | |
response_first.data.total_comments.comment_count_marks_1, | |
response_first.data.total_comments.comment_count_marks_2, | |
response_first.data.total_comments.comment_count_marks_3, | |
response_first.data.total_comments.comment_count_marks_4, | |
] | |
) | |
percent_up = int((good_count / all_count) * 100) | |
percent_down = int((bad_count / all_count) * 100) | |
percents_table = percent_table(url, percent_up, percent_down) | |
for page1 in track(range(1, page_size + 1), description='Parsing...'): | |
response = parse_rztk_comments(page1, item_id) | |
if response.is_failure: | |
bot.reply_to(message, f'Error: {response.value}') | |
continue | |
for comment in response.unwrap().data.comments: | |
percents_table.add_row( | |
str(comment.usertitle), | |
str(comment.created.pop_date), | |
str(comment.shortcomings), | |
str(comment.mark), | |
) | |
structure_table.add_row(percents_table, stars_table) | |
console = Console(width=150, record=True) | |
console.rule(f'[bold blue]{title}') | |
console.print(structure_table, justify='center') | |
with open(RZTK_TMP_FILE_PATH, 'w') as f: | |
f.write(console.export_html()) | |
bot.send_document(message.chat.id, open(RZTK_TMP_FILE_PATH, 'rb'), caption=title) | |
bot.polling() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment