Last active
April 23, 2023 15:21
-
-
Save estevecastells/1cd494907f505ff540b5a20769f2d1d2 to your computer and use it in GitHub Desktop.
Script to generate permutations of two elements at scale
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import itertools | |
import csv | |
# Function to read elements from the CSV file | |
def read_elements(file_path): | |
with open(file_path, 'r', encoding='utf-8') as csvfile: | |
reader = csv.reader(csvfile) | |
elements = [row[0] for row in reader] | |
return elements | |
# Function to write generated URLs to a CSV file | |
def write_urls_to_csv(file_path, urls): | |
with open(file_path, 'w', newline='') as csvfile: | |
writer = csv.writer(csvfile) | |
for url in urls: | |
writer.writerow([url]) | |
# Read element1 from the CSV file // Change path if needed, made for Kaggle environment | |
element1_csv_path = '/kaggle/input/elements1/elements1.csv' | |
element1_list = read_elements(element1_csv_path) | |
# Read element2 from the CSV file // Change path if needed, made for Kaggle environment | |
element2_csv_path = '/kaggle/input/elements2/elements2.csv' | |
element2_list = read_elements(element2_csv_path) | |
# Generate the permutations at scale | |
urls = [] | |
for element1, element2 in itertools.product(element1_list, element2_list): | |
url = f"{element1}{element2}/" | |
urls.append(url) | |
# Write the generated URLs to a CSV file | |
output = '/kaggle/working/output.csv' | |
write_urls_to_csv(output, urls) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script has been tested to work in the Kaggle environment by generating a CSV file of 3M rows of 230mb so it should be able to scale to a decent