Created
November 5, 2022 16:19
-
-
Save luxu/ad57db9ceaddf4c4e2a370449ce1a9a5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python | |
# coding: utf-8 | |
import itertools | |
from collections import OrderedDict | |
from decimal import Decimal | |
import numpy as np | |
from html_table_parser.parser_functions import extract_tables, make2d | |
from requests import get, packages | |
class Estatistica: | |
def __init__(self): | |
self.numbers = {} | |
self.winners = [] | |
self.prizes = [] | |
self.sorted_numbers = 0 | |
self.listed_numbers = 0 | |
self.more_frequent_numbers = 0 | |
self.less_frequent_numbers = 0 | |
self.open_csv() | |
def show_statistics(self): | |
"""Mostra na tela informações sobre os jogos anteriores""" | |
print("Analisando...") | |
# Lendo dados do arquivo HTML... | |
for line in self.data: | |
# Soma ocorrência de cada número: | |
for num in range(2, 8): | |
num = line[num] | |
if num not in self.numbers: | |
self.numbers[num] = 0 | |
self.numbers[num] += 1 | |
# Quantidade de ganhadores | |
winners_qty = int(line[9]) | |
self.winners.append(winners_qty) | |
# Total do prêmio | |
prize_value = line[12].replace(".", "").replace(",", ".") | |
prize_total = Decimal(prize_value) * Decimal(winners_qty) | |
self.prizes.append(prize_total) | |
# Ordena os números sorteados por ocorrência | |
self.sorted_numbers = OrderedDict(sorted(self.numbers.items(), key=lambda x: x[1], reverse=True)) | |
# self.sorted_numbers = 0 | |
self.listed_numbers = list(self.sorted_numbers.keys()) | |
self.more_frequent_numbers = sorted(self.listed_numbers[:10]) | |
self.less_frequent_numbers = sorted(self.listed_numbers[-10:]) | |
self.less_frequent_numbers.reverse() | |
print(f"\nConcursos de {self.data[0][1]} até {self.data[-1][1]}:") | |
print(" Concursos realizados: %s\n" % self.format_number(len(self.data))) | |
print(f" Total de ganhadores: {self.format_number(int(np.sum(self.winners)))}") | |
print(" Média de ganhadores por concurso: %s\n" % self.format_number(float(np.mean(self.winners)))) | |
print(f" Total em prêmios concedidos: R$ {self.format_number(int(np.sum(self.prizes)))}") | |
print(" Média de prêmio por concurso: R$ %s\n" % self.format_number(int(np.mean(self.prizes)))) | |
print(f' Os 10 números mais frequêntes: {", ".join(self.more_frequent_numbers)}') | |
print(f' Os 10 números menos frequêntes: {", ".join(self.less_frequent_numbers)}') | |
def format_number(self, number): | |
"""Formata número para BRL""" | |
if type(number) == int: | |
return "{:0,}".format(number).replace(",", ".") | |
return "{:.2f}".format(number).replace(".", ",") | |
def open_csv(self): | |
with open("data/d_mega.htm") as file: | |
tables = extract_tables(file.read()) | |
self.data = make2d(tables[0])[1:] | |
def scrapping_site(self, nro): | |
# url = f"https://servicebus2.caixa.gov.br/portaldeloterias/api/megasena/{nro}" | |
url = f"https://servicebus2.caixa.gov.br/portaldeloterias/api/lotofacil/{nro}" | |
packages.urllib3.disable_warnings(packages.urllib3.exceptions.InsecureRequestWarning) | |
response = get(url, verify=False) | |
infos = response.json() | |
sorteio_numero = infos["numero"] | |
data_do_sorteio = infos["dataApuracao"] | |
numeros_sorteados = infos["listaDezenas"] | |
print(sorteio_numero, data_do_sorteio, numeros_sorteados) | |
def retornar_os_10_mais_sorteados(self): | |
for line, num in itertools.product(self.data, range(2, 8)): | |
num = line[num] | |
if num not in self.numbers: | |
self.numbers[num] = 0 | |
self.numbers[num] += 1 | |
self.sorted_numbers = OrderedDict(sorted(self.numbers.items(), key=lambda x: x[1], reverse=True)) | |
self.listed_numbers = list(self.sorted_numbers.keys()) | |
self.more_frequent_numbers = sorted(self.listed_numbers[:10]) | |
self.more_frequent_numbers = [int(n) for n in self.more_frequent_numbers] | |
return self.more_frequent_numbers | |
if __name__ == "__main__": | |
estatistic = Estatistica() | |
for nro in range(2500, 2656): | |
estatistic.scrapping_site(nro) | |
# estatistic.show_statistics() | |
# os_10_mais = estatistic.retornar_os_10_mais_sorteados() | |
# print(os_10_mais) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment