#!/bin/bash
for i in $(seq -f "%04g" 0000 0249)
do
echo "=========================================================================================================="
input_file="input$i"
echo $input_file
/app/jdk/bin/java -Xmx2048m -jar -Dtarget=$input_file cleanscoring-batch-0.0.1.jar cleanScoringJob >> ./logs/clean_scoring_all_product.log 2>&1
done
split -l 100000 -d -a 4 all_product.csv input
#!/bin/bash
cat output* > result.csv
import csv
import gzip
import os
file_name = '01.csv'
out_file_name = 'all_product.csv'
if os.path.exists(out_file_name):
os.remove(out_file_name)
with open(file_name) as ifd, open(out_file_name, mode='a') as ofd:
csv_reader = csv.reader(ifd, delimiter=',', quoting=csv.QUOTE_NONE)
csv_writer = csv.writer(ofd, delimiter=',')
try:
for row in csv_reader:
url = row[2].split(',')[0].strip('"')
out_row = [row[0], row[1], url]
# print url
csv_writer.writerow(out_row)
except Exception as error:
print error