nobodyzxc/rate_fetch.c

## rate_fetch.c
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
char str[1000];
int back(char *tail){
    char *head;
    for(head = tail ; ; head--)
        if(*head == ';' || *head == '>')
            break;

    for(head += 1 ; head < tail ; head++)
        printf("%c" , *head);

    printf("  ");
    return 0;
}
int main(int argc , char *argv[]){
    if(argc != 2){
        puts("Wrong argc");
        return -1;
    }
    int i , cnt = atoi(argv[1]);//cnt is column num
    char *tail;
    while(fgets(str , sizeof(str) , stdin)){
        for(i = 0 ; i < cnt ; i++)
            if(tail = strstr(str , "</td>"))
                back(tail) , strncpy(tail , "ignore" , 6);
            else
                break;
        puts("");
    }
    return 0;
}

## rate_item.py
import scrapy
class RateItem(scrapy.Item):
    currency = scrapy.Field()
    value = scrapy.Field()
    pass

## rate_pipline.py
class RatePipeline(object):
    def __init__(self):
        pass

    def process_item(self, item, spider):
        with open('rate.txt' , 'w+') as file:
            file.write('%20s %10s %10s %10s %10s' %('currency' , 'cash_in' , 'cash_out' , 'sight_in' , 'sight_out'))
            file.write('\n')
            i = 0
            for country in item['currency'][::]:
                file.write( '{0:->25}'.format(str(country.encode('utf-8')))) ,
                for j in range(4):
                    file.write('%10s ' % item['value'][i + j]),
                file.write('\n');
                i += 4
        return item

## rate_scrapy.sh
#!/bin/bash
URL="http://rate.bot.com.tw/Pages/Static/UIP003.zh-TW.htm"
COLS=10 #how many columns do you want to fetch...
C_FILE="fetch.c"

if [ $# == 1 ];then
    case $1 in
      "h" | "help" | "-h" | "--help")
        echo "scrapy [ URL ]"
        exit;;
      *)
        URL=$1;;
    esac
fi

wget -O bank_rate.html $URL 2>/dev/null

if [ $? -eq 0 ];then
    if [ ! -f fetch.exe ];then
        gcc -o fetch.exe $C_FILE
        if [ ! $? -eq 0 ];then
            echo "compile fetch.c failed"
            exit
        fi
    fi
    grep decimal bank_rate.html | ./fetch.exe $COLS > rate.txt
    cat rate.txt
    rm -f bank_rate.html fetch.exe rate.txt
else
    echo "download file failed"
fi

## rate_spider.py
import scrapy
import sys
from rate.items import RateItem

class RateSpider(scrapy.Spider):
    name = "rate_spider"
    allowed_domains = ["bot.com.tw"]
    start_urls = ['http://rate.bot.com.tw/Pages/Static/UIP003.zh-TW.htm']

    def parse(self, response):
        item = RateItem()
        all_data = response.xpath('//*[@id="slice1"]');
        item['currency'] = all_data.css('td.titleLeft::text').extract()
        item['value'] = all_data.css('td.decimal::text').extract()
        return item
	#include<stdio.h>
	#include<stdlib.h>
	#include<string.h>
	char str[1000];
	int back(char *tail){
	char *head;
	for(head = tail ; ; head--)
	if(head == ';' \|\| head == '>')
	break;

	for(head += 1 ; head < tail ; head++)
	printf("%c" , *head);

	printf(" ");
	return 0;
	}
	int main(int argc , char *argv[]){
	if(argc != 2){
	puts("Wrong argc");
	return -1;
	}
	int i , cnt = atoi(argv[1]);//cnt is column num
	char *tail;
	while(fgets(str , sizeof(str) , stdin)){
	for(i = 0 ; i < cnt ; i++)
	if(tail = strstr(str , "</td>"))
	back(tail) , strncpy(tail , "ignore" , 6);
	else
	break;
	puts("");
	}
	return 0;
	}
	import scrapy
	class RateItem(scrapy.Item):
	currency = scrapy.Field()
	value = scrapy.Field()
	pass
	class RatePipeline(object):
	def __init__(self):
	pass

	def process_item(self, item, spider):
	with open('rate.txt' , 'w+') as file:
	file.write('%20s %10s %10s %10s %10s' %('currency' , 'cash_in' , 'cash_out' , 'sight_in' , 'sight_out'))
	file.write('\n')
	i = 0
	for country in item['currency'][::]:
	file.write( '{0:->25}'.format(str(country.encode('utf-8')))) ,
	for j in range(4):
	file.write('%10s ' % item['value'][i + j]),
	file.write('\n');
	i += 4
	return item
	#!/bin/bash
	URL="http://rate.bot.com.tw/Pages/Static/UIP003.zh-TW.htm"
	COLS=10 #how many columns do you want to fetch...
	C_FILE="fetch.c"

	if [ $# == 1 ];then
	case $1 in
	"h" \| "help" \| "-h" \| "--help")
	echo "scrapy [ URL ]"
	exit;;
	*)
	URL=$1;;
	esac
	fi

	wget -O bank_rate.html $URL 2>/dev/null

	if [ $? -eq 0 ];then
	if [ ! -f fetch.exe ];then
	gcc -o fetch.exe $C_FILE
	if [ ! $? -eq 0 ];then
	echo "compile fetch.c failed"
	exit
	fi
	fi
	grep decimal bank_rate.html \| ./fetch.exe $COLS > rate.txt
	cat rate.txt
	rm -f bank_rate.html fetch.exe rate.txt
	else
	echo "download file failed"
	fi
	import scrapy
	import sys
	from rate.items import RateItem

	class RateSpider(scrapy.Spider):
	name = "rate_spider"
	allowed_domains = ["bot.com.tw"]
	start_urls = ['http://rate.bot.com.tw/Pages/Static/UIP003.zh-TW.htm']

	def parse(self, response):
	item = RateItem()
	all_data = response.xpath('//*[@id="slice1"]');
	item['currency'] = all_data.css('td.titleLeft::text').extract()
	item['value'] = all_data.css('td.decimal::text').extract()
	return item