Skip to content

Instantly share code, notes, and snippets.

@luxu
Created November 5, 2022 16:19
Show Gist options
  • Save luxu/ad57db9ceaddf4c4e2a370449ce1a9a5 to your computer and use it in GitHub Desktop.
Save luxu/ad57db9ceaddf4c4e2a370449ce1a9a5 to your computer and use it in GitHub Desktop.
#!/usr/local/bin/python
# coding: utf-8
import itertools
from collections import OrderedDict
from decimal import Decimal
import numpy as np
from html_table_parser.parser_functions import extract_tables, make2d
from requests import get, packages
class Estatistica:
def __init__(self):
self.numbers = {}
self.winners = []
self.prizes = []
self.sorted_numbers = 0
self.listed_numbers = 0
self.more_frequent_numbers = 0
self.less_frequent_numbers = 0
self.open_csv()
def show_statistics(self):
"""Mostra na tela informações sobre os jogos anteriores"""
print("Analisando...")
# Lendo dados do arquivo HTML...
for line in self.data:
# Soma ocorrência de cada número:
for num in range(2, 8):
num = line[num]
if num not in self.numbers:
self.numbers[num] = 0
self.numbers[num] += 1
# Quantidade de ganhadores
winners_qty = int(line[9])
self.winners.append(winners_qty)
# Total do prêmio
prize_value = line[12].replace(".", "").replace(",", ".")
prize_total = Decimal(prize_value) * Decimal(winners_qty)
self.prizes.append(prize_total)
# Ordena os números sorteados por ocorrência
self.sorted_numbers = OrderedDict(sorted(self.numbers.items(), key=lambda x: x[1], reverse=True))
# self.sorted_numbers = 0
self.listed_numbers = list(self.sorted_numbers.keys())
self.more_frequent_numbers = sorted(self.listed_numbers[:10])
self.less_frequent_numbers = sorted(self.listed_numbers[-10:])
self.less_frequent_numbers.reverse()
print(f"\nConcursos de {self.data[0][1]} até {self.data[-1][1]}:")
print(" Concursos realizados: %s\n" % self.format_number(len(self.data)))
print(f" Total de ganhadores: {self.format_number(int(np.sum(self.winners)))}")
print(" Média de ganhadores por concurso: %s\n" % self.format_number(float(np.mean(self.winners))))
print(f" Total em prêmios concedidos: R$ {self.format_number(int(np.sum(self.prizes)))}")
print(" Média de prêmio por concurso: R$ %s\n" % self.format_number(int(np.mean(self.prizes))))
print(f' Os 10 números mais frequêntes: {", ".join(self.more_frequent_numbers)}')
print(f' Os 10 números menos frequêntes: {", ".join(self.less_frequent_numbers)}')
def format_number(self, number):
"""Formata número para BRL"""
if type(number) == int:
return "{:0,}".format(number).replace(",", ".")
return "{:.2f}".format(number).replace(".", ",")
def open_csv(self):
with open("data/d_mega.htm") as file:
tables = extract_tables(file.read())
self.data = make2d(tables[0])[1:]
def scrapping_site(self, nro):
# url = f"https://servicebus2.caixa.gov.br/portaldeloterias/api/megasena/{nro}"
url = f"https://servicebus2.caixa.gov.br/portaldeloterias/api/lotofacil/{nro}"
packages.urllib3.disable_warnings(packages.urllib3.exceptions.InsecureRequestWarning)
response = get(url, verify=False)
infos = response.json()
sorteio_numero = infos["numero"]
data_do_sorteio = infos["dataApuracao"]
numeros_sorteados = infos["listaDezenas"]
print(sorteio_numero, data_do_sorteio, numeros_sorteados)
def retornar_os_10_mais_sorteados(self):
for line, num in itertools.product(self.data, range(2, 8)):
num = line[num]
if num not in self.numbers:
self.numbers[num] = 0
self.numbers[num] += 1
self.sorted_numbers = OrderedDict(sorted(self.numbers.items(), key=lambda x: x[1], reverse=True))
self.listed_numbers = list(self.sorted_numbers.keys())
self.more_frequent_numbers = sorted(self.listed_numbers[:10])
self.more_frequent_numbers = [int(n) for n in self.more_frequent_numbers]
return self.more_frequent_numbers
if __name__ == "__main__":
estatistic = Estatistica()
for nro in range(2500, 2656):
estatistic.scrapping_site(nro)
# estatistic.show_statistics()
# os_10_mais = estatistic.retornar_os_10_mais_sorteados()
# print(os_10_mais)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment