werdan/EMEX pre-processor

## EMEX pre-processor
#!/bin/bash

cd /var/www/shared/emex
rm all.csv
rm *.rar
wget ftp://emexonline.com/megaprice/{EMIR,EMIJ,EMIS,EMIN,EMIT,EUSA,EMIZ,EMIL,EURU,FAST,KORA,KOSA,OPTA}.rar

find . -name "*.rar" -exec unrar x -o+ {} \;

# Pre-sorting SKU and Brands mapping

## SKU,ID,BRAND,PN
## ABS_0001Q,2731821,A.B.S.,0001Q
LANG=en_EN sort sku.lst -o sku.lst
## EMEX_BRAND_CODE,BOODMO_BRAND_NAME,BOODMO_BRAND_ID
## !!,ARMSTER,4478
LANG=en_EN sort brands.lst -o brands.lst
## https://gist.github.com/werdan/20606e3f02837f83e1b7d9ac407e443a
LANG=en_EN sort suppliers.lst -o suppliers.lst


for f in `find . -name "*.txt" -type f`; do
 file=$(basename "$f" | cut -d. -f1)
 echo "Processing $file.txt ..."

 # Converting to UTF8
 sed "s/\t/,/g" $file.txt | iconv -f WINDOWS-1251 -t UTF-8 > $file.tmp

 # Remove unneeded fields
 # Convert prices with multiplicator and round down
 # Replace /tab to comma
 awk -F, -v OFS=',' 'NR>1 {$3=($3*1.4+50)*67;print $1,$2,int($3)}' $file.tmp > $file.csv

 # Joining with brand name information
 LANG=en_EN join -a1 -j1 -t, -o2.2,1.2,1.3 <(LANG=en_EN sort $file.csv) brands.lst > $file.csv2

 # Prefix each line with Supplier code
 nl -s $file, $file.csv2 |  sed -e 's/^[ \t]*//' | sed -e 's/^[0-9]*//'  > $file.csv

 # Joining with supplier id information
 LANG=en_EN join -a1 -j1 -t, -o1.2,1.3,1.4,2.2 <(LANG=en_EN sort $file.csv) suppliers.lst > $file.csv2

 # Creating SKUs from Brand + PN + qty=1 + enabled=1
 awk -F, -v OFS=',' '{gsub(/[^[:alnum:]]/,"",$2); print toupper($1)"_"toupper($2),$3,$2,$1,$4,"1","1"}' $file.csv2 > $file.csv3

 rm -fr $file.csv

 # Joining with  SKUs that are currently in our DB as parts
 LANG=en_EN join -a1 -j1 -t, -o1.5,2.2,1.4,1.3,1.2,1.6,1.7 <(LANG=en_EN sort $file.csv3) sku.lst > $file.csv

 # Removing temp files
 rm $file.{tmp,txt,csv2,csv3}

 # Gluing all together - file that contains both matched and not-matched lines
 # We need it for analytics
 # Line format
 # supplier_id,part_id,brand_code,number,price,qty,enabled


 cat $file.csv >> all.csv

 # Removing not-matched lines
 awk -F, -v OFS=',' '$2>0 {print $1,$2,$3,$4,$5,$6,$7}' all.csv > all_matched.csv

done
	#!/bin/bash

	cd /var/www/shared/emex
	rm all.csv
	rm *.rar
	wget ftp://emexonline.com/megaprice/{EMIR,EMIJ,EMIS,EMIN,EMIT,EUSA,EMIZ,EMIL,EURU,FAST,KORA,KOSA,OPTA}.rar

	find . -name "*.rar" -exec unrar x -o+ {} \;

	# Pre-sorting SKU and Brands mapping

	## SKU,ID,BRAND,PN
	## ABS_0001Q,2731821,A.B.S.,0001Q
	LANG=en_EN sort sku.lst -o sku.lst
	## EMEX_BRAND_CODE,BOODMO_BRAND_NAME,BOODMO_BRAND_ID
	## !!,ARMSTER,4478
	LANG=en_EN sort brands.lst -o brands.lst
	## https://gist.github.com/werdan/20606e3f02837f83e1b7d9ac407e443a
	LANG=en_EN sort suppliers.lst -o suppliers.lst


	for f in `find . -name "*.txt" -type f`; do
	file=$(basename "$f" \| cut -d. -f1)
	echo "Processing $file.txt ..."

	# Converting to UTF8
	sed "s/\t/,/g" $file.txt \| iconv -f WINDOWS-1251 -t UTF-8 > $file.tmp

	# Remove unneeded fields
	# Convert prices with multiplicator and round down
	# Replace /tab to comma
	awk -F, -v OFS=',' 'NR>1 {$3=($31.4+50)67;print $1,$2,int($3)}' $file.tmp > $file.csv

	# Joining with brand name information
	LANG=en_EN join -a1 -j1 -t, -o2.2,1.2,1.3 <(LANG=en_EN sort $file.csv) brands.lst > $file.csv2

	# Prefix each line with Supplier code
	nl -s $file, $file.csv2 \| sed -e 's/^[ \t]//' \| sed -e 's/^[0-9]//' > $file.csv

	# Joining with supplier id information
	LANG=en_EN join -a1 -j1 -t, -o1.2,1.3,1.4,2.2 <(LANG=en_EN sort $file.csv) suppliers.lst > $file.csv2

	# Creating SKUs from Brand + PN + qty=1 + enabled=1
	awk -F, -v OFS=',' '{gsub(/[^[:alnum:]]/,"",$2); print toupper($1)"_"toupper($2),$3,$2,$1,$4,"1","1"}' $file.csv2 > $file.csv3

	rm -fr $file.csv

	# Joining with SKUs that are currently in our DB as parts
	LANG=en_EN join -a1 -j1 -t, -o1.5,2.2,1.4,1.3,1.2,1.6,1.7 <(LANG=en_EN sort $file.csv3) sku.lst > $file.csv

	# Removing temp files
	rm $file.{tmp,txt,csv2,csv3}

	# Gluing all together - file that contains both matched and not-matched lines
	# We need it for analytics
	# Line format
	# supplier_id,part_id,brand_code,number,price,qty,enabled


	cat $file.csv >> all.csv

	# Removing not-matched lines
	awk -F, -v OFS=',' '$2>0 {print $1,$2,$3,$4,$5,$6,$7}' all.csv > all_matched.csv

	done