Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save speedupmate/fb1f5e4927ae62e4c3c8e9a8986b10b9 to your computer and use it in GitHub Desktop.
Save speedupmate/fb1f5e4927ae62e4c3c8e9a8986b10b9 to your computer and use it in GitHub Desktop.
Flatten additional_attributes from Magento 2 export csv
"""
This script formats a magento product export file, and sets each
additional_attribute as a new column in a target file
"""
import csv
import pandas as pd
# testing files
# source_file = './importexport_605048471de17.csv'
# target_file = './importexport_605048471de177_formatted.csv'
# real files
source_file = './catalog_product_20210316_044037.csv'
target_file = './catalog_product_20210316_044037_formatted.csv'
def get_modified_headers():
reader = csv.reader(open(source_file, 'r'))
headers = next(reader)
# print('headers original')
# print(headers)
# get all headers for additional_attributes
additional_attributes_headers = []
i = 0
for row in reader:
# print(f'processing row: {i}')
target_index = headers.index("additional_attributes")
attributes = row[target_index].split(",")
for attribute in attributes:
data = attribute.split("=")
if len(data) != 1:
if data[0] != 'publish_date':
header = data[0]
# add attribute as header
if header not in additional_attributes_headers:
additional_attributes_headers.append(header)
# iterator
i += 1
# print('headers modified')
headers_modified = headers + additional_attributes_headers
# print(headers_modified)
return [headers_modified, additional_attributes_headers]
def run_with_writer():
""" Main run function """
reader = csv.reader(open(source_file, 'r'))
writer = csv.writer(open(target_file, 'w'))
# get original headers
headers = next(reader)
# print('original headers')
# print(headers)
# add additional attributes to headers
modified_headers = get_modified_headers()[0]
additional_attributes_headers = get_modified_headers()[1]
# print('modified headers')
# print(modified_headers)
# print('additional_attribute headers')
# print(additional_attributes_headers)
# write headers to file
writer.writerow(modified_headers)
# extract data and populate rows
i = 0
for row in reader:
print(f'processing row: {i}')
# print('original row')
# print(row)
result = [None] * len(additional_attributes_headers)
# print(result)
# pre-populate result (to avoid shifting items as we add insert them at the index)
target_index = headers.index("additional_attributes")
# print(row[target_index])
attributes = row[target_index].split(",")
# print(attributes)
for attribute in attributes:
# print(attribute)
data = attribute.split("=")
if len(data) != 1:
if data[0] != 'publish_date':
header = data[0]
value = data[1]
# print('get target index for value')
target_index = additional_attributes_headers.index(header)
# print(f'insert {value} at index {target_index}')
result[target_index] = value
modified_row = row + result
# print('modified headers')
# print(modified_headers)
# print('modified row')
# print(modified_row)
# write data
# write original row
# writer.writerow(row)
# write modified row
writer.writerow(modified_row)
# break loop
# if i == 10:
# break
i += 1
# Main entry point
if __name__ == '__main__':
run_with_writer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment