-
-
Save caiquearaujo/b92108d20eb0ad29d9aab4193d748a13 to your computer and use it in GitHub Desktop.
Gerador de dados de CPF (sem autenticidade)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# produce.py | |
from multiprocessing import Pool | |
import random | |
import csv | |
def generate_cpf(): | |
while True: | |
cpf_digits = [] | |
zero_sequence = 0 | |
for _ in range(11): | |
digit = random.randint(0, 9) | |
if digit == 0: | |
zero_sequence += 1 | |
if zero_sequence >= 2: | |
break | |
else: | |
zero_sequence = 0 | |
cpf_digits.append(str(digit)) | |
else: | |
cpf = ''.join(cpf_digits) | |
return cpf | |
def generate_unique_cpfs_process(args): | |
total_cpfs_per_process, process_id = args | |
print(f"Processo {process_id} iniciando a geração de CPFs...") | |
cpf_set = set() | |
while len(cpf_set) < total_cpfs_per_process: | |
cpf = generate_cpf() | |
cpf_set.add(cpf) | |
print(f"Processo {process_id} completou a geração de CPFs.") | |
return cpf_set | |
def generate_unique_cpfs(total_cpfs=2_000_000, num_processes=20): | |
print("Iniciando a geração de CPFs com multiprocessing...") | |
cpfs_per_process = (total_cpfs + num_processes - 1) | |
args_list = [(cpfs_per_process, i) for i in range(num_processes)] | |
with Pool(processes=num_processes) as pool: | |
cpf_sets = pool.map(generate_unique_cpfs_process, args_list) | |
cpf_set = set.union(*cpf_sets) | |
cpf_list = list(cpf_set) | |
while len(cpf_list) < total_cpfs: | |
cpf = generate_cpf() | |
if cpf not in cpf_set: | |
cpf_set.add(cpf) | |
cpf_list.append(cpf) | |
with open('cpfs.csv', 'w', newline='') as csv_file: | |
csv_writer = csv.writer(csv_file) | |
for idx, cpf in enumerate(cpf_list, 1): | |
csv_writer.writerow([cpf]) | |
if idx % 100_000 == 0: | |
print(f"Escritos {idx} CPFs") | |
print("Geração de CPFs concluída.") | |
def main(): | |
generate_unique_cpfs() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment