Skip to content

Instantly share code, notes, and snippets.

@johanlantz
Last active July 13, 2019 18:46
Show Gist options
  • Save johanlantz/943a15e9c19d700055dbdb7fa70db060 to your computer and use it in GitHub Desktop.
Save johanlantz/943a15e9c19d700055dbdb7fa70db060 to your computer and use it in GitHub Desktop.
# SELECT p.RTG_ID, p.RVW_PROD_ID, p.RVW_LANG_ID, p.RVW_DATE_ADD, p.RVW_STATUS, p.RVW_CUST_ID, psr.RTG_NOTE, cust.email, cust.firstname, cust.lastname, p.RVW_DATA FROM ps_gsr_review p LEFT JOIN ps_gsr_rating psr ON p.RTG_ID = psr.RTG_ID LEFT JOIN ps_customer cust ON cust.id_customer = p.RVW_CUST_ID
import csv
IDX_RAT_ID = 0
IDX_REV_PROD_ID = 1
IDX_REV_LANG_ID = 2
IDX_REV_DATE = 3
IDX_REV_STATUS = 4
IDX_REV_CUST_ID = 5
IDX_RTG_NOTE = 6
IDX_CUST_EMAIL = 7
IDX_CUST_FIRSTNAME = 8
IDX_CUST_LASTNAME = 9
yotpo_csv_headers = "published,review_title,review_content,review_score,date,product_id,display_name,email,comment_content,lang_id,cust_id"
def get_next_line_end(data, start):
end = data.find("}", start)
if end != -1:
return end + 4 #need to scan past the terminating }";\n to get to the next line
else:
return -1
def get_params_without_json_field(line):
end = line.find("a:4:{")
print ("Params line without json is", line[:end])
return line[:end]
def get_json_data_field(line):
start = line.find("a:4:{")
if start != -1:
end = data.find("}", start) +1
return line[start:end]
else:
return None
def get_field_value(field_name, json_data):
start = json_data.find(field_name) + len(field_name) + 5 # move to the s: part
end = json_data.find(":", start)
field_value_start = end +1
field_value_end = json_data.find("\";", start) +1
print ("God field value for " + field_name + " as " + json_data[field_value_start:field_value_end])
return json_data[field_value_start:field_value_end].replace('\n', ' ').replace('\r', '');
def remove_outer_quotes(line):
return line[1:len(line)-1]
start = 0
csvfile = open('formatted.csv', 'wb')
with open('x5.csv', 'rb') as f:
data = f.read()
outfile = open('formatted.csv', 'wb')
outfile.write(yotpo_csv_headers)
outfile.write("\n")
line_start = 0
# We must skip the first line with the headers
line_end = data.find("\n", 0)
line_start = line_end
line_end = get_next_line_end(data, line_start)
while line_end is not -1:
line = data[line_start:line_end]
print ("got line " + line , "\n")
params_list = get_params_without_json_field(line).split(";")
print ("got params ", params_list)
if params_list[IDX_REV_STATUS] == "0":
print ("Skipping review since status=0")
continue
json_data = get_json_data_field(line)
print ("Got json field as " + json_data)
outfile.write("true," + #published
remove_outer_quotes(get_field_value("sTitle", json_data)) + "," +
remove_outer_quotes(get_field_value("sComment", json_data)) + "," +
remove_outer_quotes(params_list[IDX_RTG_NOTE]) + "," +
remove_outer_quotes(params_list[IDX_REV_DATE]) + "," +
remove_outer_quotes(params_list[IDX_REV_PROD_ID]) + "," +
remove_outer_quotes(params_list[IDX_CUST_FIRSTNAME]) + " " + remove_outer_quotes(params_list[IDX_CUST_LASTNAME])[0] + "," +
remove_outer_quotes(params_list[IDX_CUST_EMAIL]) + "," +
"," + #comment content
remove_outer_quotes(params_list[IDX_REV_LANG_ID]) + "," +
remove_outer_quotes(params_list[IDX_REV_CUST_ID]) + "," + "\n")
# get the next line
line_start = line_end
line_end = get_next_line_end(data, line_start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment