Skip to content

Instantly share code, notes, and snippets.

@WinstonN
Last active March 16, 2021 19:58
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save WinstonN/bc3c1fa4e32c266669b9eb5c91d981c7 to your computer and use it in GitHub Desktop.
Save WinstonN/bc3c1fa4e32c266669b9eb5c91d981c7 to your computer and use it in GitHub Desktop.
Flatten additional_attributes from Magento 2 export csv
"""
This script formats a magento product export file, and sets each
additional_attribute as a new column in a target file
"""
import csv
# real files
source_file = './catalog_product_20210316_044037.csv'
target_file = './catalog_product_20210316_044037_formatted.csv'
def get_modified_headers():
reader = csv.reader(open(source_file, 'r'))
headers = next(reader)
# get all headers for additional_attributes
additional_attributes_headers = []
i = 0
for row in reader:
target_index = headers.index("additional_attributes")
attributes = row[target_index].split(",")
for attribute in attributes:
data = attribute.split("=")
if len(data) != 1:
if data[0] != 'publish_date':
header = data[0]
# add attribute as header
if header not in additional_attributes_headers:
additional_attributes_headers.append(header)
# iterator
i += 1
headers_modified = headers + additional_attributes_headers
return [headers_modified, additional_attributes_headers]
def run_with_writer():
""" Main run function """
reader = csv.reader(open(source_file, 'r'))
writer = csv.writer(open(target_file, 'w'))
# get original headers
headers = next(reader)
# add additional attributes to headers
modified_headers = get_modified_headers()[0]
additional_attributes_headers = get_modified_headers()[1]
# write headers to file
writer.writerow(modified_headers)
# extract data and populate rows
i = 0
for row in reader:
print(f'processing row: {i}')
result = [None] * len(additional_attributes_headers)
# pre-populate result (to avoid shifting items as we add insert them at the index)
target_index = headers.index("additional_attributes")
attributes = row[target_index].split(",")
for attribute in attributes:
data = attribute.split("=")
if len(data) != 1:
if data[0] != 'publish_date':
header = data[0]
value = data[1]
target_index = additional_attributes_headers.index(header)
result[target_index] = value
modified_row = row + result
# write modified row
writer.writerow(modified_row)
i += 1
# Main entry point
if __name__ == '__main__':
run_with_writer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment