Created
September 14, 2013 08:34
-
-
Save shanehou/6560022 to your computer and use it in GitHub Desktop.
代码没写好,数据多的话就会超边界……另外,err的处理很蛋疼,不知道啥方法比较好,求指导……
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/csv" | |
"fmt" | |
"io" | |
"io/ioutil" | |
"net/http" | |
"net/http/cookiejar" | |
"net/url" | |
"os" | |
"regexp" | |
"strings" | |
"time" | |
) | |
func processData(input string, output string) (err error) { | |
inputFile, err := os.Open(input) | |
outputFile, err := os.Create(output) | |
if err != nil { | |
return | |
} | |
defer inputFile.Close() | |
defer outputFile.Close() | |
reader := csv.NewReader(inputFile) | |
writer := csv.NewWriter(outputFile) | |
for record, err := reader.Read(); err != io.EOF; record, err = reader.Read() { | |
if err != nil { | |
return err | |
} | |
count, err := searchCount(record[0], record[1], strings.Replace(record[2], " ", "", -1)) | |
if err != nil { | |
return err | |
} | |
record = append(record, count) | |
fmt.Println(record) | |
err = writer.Write(record) | |
if err != nil { | |
return err | |
} | |
} | |
return nil | |
} | |
func searchCount(stock string, year string, name string) (count string, err error) { | |
u, err := url.Parse("http://epub.cnki.net") | |
if err != nil { | |
return | |
} | |
u.Path += "/KNS/request/SearchHandler.ashx" | |
v := url.Values{} | |
v.Add("action", "") | |
v.Add("NaviCode", "*") | |
v.Add("ua", "1.21") | |
v.Add("PageName", "ASP.brief_result_aspx") | |
v.Add("DbPrefix", "CCND") | |
v.Add("DbCatalog", "中国重要报纸全文数据库") | |
v.Add("ConfigFile", "CCND.xml") | |
v.Add("db_opt", "中国重要报纸全文数据库") | |
v.Add("db_value", "中国重要报纸全文数据库") | |
v.Add("magazine_value1", "中国证券报+上海证券报+证券时报+证券日报") | |
v.Add("magazine_special1", "=") | |
v.Add("publishdate_from", year+"-01-01") | |
v.Add("publishdate_to", year+"-12-31") | |
v.Add("au_1_sel", "AU") | |
v.Add("au_1_special1", "=") | |
v.Add("txt_1_sel", "FT") | |
v.Add("txt_1_value1", stock) | |
v.Add("txt_1_value2", name) | |
v.Add("txt_1_relation", "#CNKI_OR") | |
v.Add("txt_1_special1", "%") | |
v.Add("his", "0") | |
const layout = "Mon Jan 02 2006 15:04:05 GMT+0800 (CST)" | |
t := time.Now() | |
v.Add("__", t.Format(layout)) | |
u.RawQuery = v.Encode() | |
jar, err := cookiejar.New(nil) | |
if err != nil { | |
return | |
} | |
c := &http.Client{nil, nil, jar} | |
resp, err := c.Get(u.String()) | |
if err != nil { | |
return | |
} | |
b, err := ioutil.ReadAll(resp.Body) | |
if err != nil { | |
return | |
} | |
resp.Body.Close() | |
resp, err = c.Get("http://epub.cnki.net/kns/brief/brief.aspx?pagename=" + string(b)) | |
if err != nil { | |
return | |
} | |
b, err = ioutil.ReadAll(resp.Body) | |
if err != nil { | |
return | |
} | |
re := regexp.MustCompile(" 找到 (\\d+) 条结果 ") | |
resp.Body.Close() | |
searchResult := re.FindSubmatch(b) | |
count = string(searchResult[1]) | |
return | |
} | |
func main() { | |
err := processData("testdata.csv", "output.csv") | |
if err != nil { | |
panic(err) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
000012 | 2010 | 南 玻A | 71237587 | 59649365 | 74 | |||
---|---|---|---|---|---|---|---|---|
000012 | 2011 | 南 玻A | 97496836 | 84435273 | 88 | |||
000014 | 2010 | 沙河股份 | 12821932.22 | 12821932.22 | 14 | |||
000014 | 2011 | 沙河股份 | 9785185.39 | 9785185.39 | 10 | |||
000020 | 2010 | 深华发A | 1939765 | 1201976.99 | 8 | |||
000020 | 2011 | 深华发A | 5926290.77 | 4925343.09 | 2 | |||
000020 | 2012 | 深华发A | 6449675.03 | 5142319.36 | 17 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment