Skip to content

Instantly share code, notes, and snippets.

@caiquearaujo
Created October 27, 2024 08:20
Show Gist options
  • Save caiquearaujo/b92108d20eb0ad29d9aab4193d748a13 to your computer and use it in GitHub Desktop.
Save caiquearaujo/b92108d20eb0ad29d9aab4193d748a13 to your computer and use it in GitHub Desktop.
Gerador de dados de CPF (sem autenticidade)
# produce.py
from multiprocessing import Pool
import random
import csv
def generate_cpf():
while True:
cpf_digits = []
zero_sequence = 0
for _ in range(11):
digit = random.randint(0, 9)
if digit == 0:
zero_sequence += 1
if zero_sequence >= 2:
break
else:
zero_sequence = 0
cpf_digits.append(str(digit))
else:
cpf = ''.join(cpf_digits)
return cpf
def generate_unique_cpfs_process(args):
total_cpfs_per_process, process_id = args
print(f"Processo {process_id} iniciando a geração de CPFs...")
cpf_set = set()
while len(cpf_set) < total_cpfs_per_process:
cpf = generate_cpf()
cpf_set.add(cpf)
print(f"Processo {process_id} completou a geração de CPFs.")
return cpf_set
def generate_unique_cpfs(total_cpfs=2_000_000, num_processes=20):
print("Iniciando a geração de CPFs com multiprocessing...")
cpfs_per_process = (total_cpfs + num_processes - 1)
args_list = [(cpfs_per_process, i) for i in range(num_processes)]
with Pool(processes=num_processes) as pool:
cpf_sets = pool.map(generate_unique_cpfs_process, args_list)
cpf_set = set.union(*cpf_sets)
cpf_list = list(cpf_set)
while len(cpf_list) < total_cpfs:
cpf = generate_cpf()
if cpf not in cpf_set:
cpf_set.add(cpf)
cpf_list.append(cpf)
with open('cpfs.csv', 'w', newline='') as csv_file:
csv_writer = csv.writer(csv_file)
for idx, cpf in enumerate(cpf_list, 1):
csv_writer.writerow([cpf])
if idx % 100_000 == 0:
print(f"Escritos {idx} CPFs")
print("Geração de CPFs concluída.")
def main():
generate_unique_cpfs()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment