Last active
June 2, 2016 11:49
-
-
Save werdan/894d65d9a47beae8f9a3b8c3bcf42f86 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
cd /var/www/shared/emex | |
rm all.csv | |
rm *.rar | |
wget ftp://emexonline.com/megaprice/{EMIR,EMIJ,EMIS,EMIN,EMIT,EUSA,EMIZ,EMIL,EURU,FAST,KORA,KOSA,OPTA}.rar | |
find . -name "*.rar" -exec unrar x -o+ {} \; | |
# Pre-sorting SKU and Brands mapping | |
## SKU,ID,BRAND,PN | |
## ABS_0001Q,2731821,A.B.S.,0001Q | |
LANG=en_EN sort sku.lst -o sku.lst | |
## EMEX_BRAND_CODE,BOODMO_BRAND_NAME,BOODMO_BRAND_ID | |
## !!,ARMSTER,4478 | |
LANG=en_EN sort brands.lst -o brands.lst | |
## https://gist.github.com/werdan/20606e3f02837f83e1b7d9ac407e443a | |
LANG=en_EN sort suppliers.lst -o suppliers.lst | |
for f in `find . -name "*.txt" -type f`; do | |
file=$(basename "$f" | cut -d. -f1) | |
echo "Processing $file.txt ..." | |
# Converting to UTF8 | |
sed "s/\t/,/g" $file.txt | iconv -f WINDOWS-1251 -t UTF-8 > $file.tmp | |
# Remove unneeded fields | |
# Convert prices with multiplicator and round down | |
# Replace /tab to comma | |
awk -F, -v OFS=',' 'NR>1 {$3=($3*1.4+50)*67;print $1,$2,int($3)}' $file.tmp > $file.csv | |
# Joining with brand name information | |
LANG=en_EN join -a1 -j1 -t, -o2.2,1.2,1.3 <(LANG=en_EN sort $file.csv) brands.lst > $file.csv2 | |
# Prefix each line with Supplier code | |
nl -s $file, $file.csv2 | sed -e 's/^[ \t]*//' | sed -e 's/^[0-9]*//' > $file.csv | |
# Joining with supplier id information | |
LANG=en_EN join -a1 -j1 -t, -o1.2,1.3,1.4,2.2 <(LANG=en_EN sort $file.csv) suppliers.lst > $file.csv2 | |
# Creating SKUs from Brand + PN + qty=1 + enabled=1 | |
awk -F, -v OFS=',' '{gsub(/[^[:alnum:]]/,"",$2); print toupper($1)"_"toupper($2),$3,$2,$1,$4,"1","1"}' $file.csv2 > $file.csv3 | |
rm -fr $file.csv | |
# Joining with SKUs that are currently in our DB as parts | |
LANG=en_EN join -a1 -j1 -t, -o1.5,2.2,1.4,1.3,1.2,1.6,1.7 <(LANG=en_EN sort $file.csv3) sku.lst > $file.csv | |
# Removing temp files | |
rm $file.{tmp,txt,csv2,csv3} | |
# Gluing all together - file that contains both matched and not-matched lines | |
# We need it for analytics | |
# Line format | |
# supplier_id,part_id,brand_code,number,price,qty,enabled | |
cat $file.csv >> all.csv | |
# Removing not-matched lines | |
awk -F, -v OFS=',' '$2>0 {print $1,$2,$3,$4,$5,$6,$7}' all.csv > all_matched.csv | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment