Skip to content

Instantly share code, notes, and snippets.

@Yukaii
Created December 25, 2014 07:00
Show Gist options
  • Save Yukaii/6820762069a947500a63 to your computer and use it in GitHub Desktop.
Save Yukaii/6820762069a947500a63 to your computer and use it in GitHub Desktop.
webscraper.io sitemap for opentech全華網路書店。不過挺多頁的,用來爬連結就好了。
{
"startUrl": "http://www.opentech.com.tw/search/result.asp",
"selectors": [{
"parentSelectors": ["_root", "nextpage"],
"type": "SelectorLink",
"multiple": true,
"id": "nextpage",
"selector": "p a:nth-of-type(2)",
"delay": ""
}, {
"parentSelectors": ["_root", "nextpage"],
"type": "SelectorLink",
"multiple": true,
"id": "pagelink",
"selector": "p td:nth-of-type(3) a",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "title",
"selector": "td.fw1 p",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "author",
"selector": "td.fw tr:contains('作(譯)者:') td.fw p",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "cover_price",
"selector": "#___01 > tbody > tr:nth-child(3) > td > table:nth-child(2) > tbody > tr > td:nth-child(3) > table > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(3) > td.fw > table:nth-child(2) > tbody > tr > td > table > tbody > tr:nth-child(2) > td:nth-child(2) > table > tbody > tr > td.fw > table:nth-child(2) > tbody > tr > td > font:nth-child(1)",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "isbn",
"selector": "#___01 > tbody > tr:nth-child(3) > td > table:nth-child(2) > tbody > tr > td:nth-child(3) > table > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(3) > td.fw > table:nth-child(2) > tbody > tr > td > table > tbody > tr:nth-child(2) > td:nth-child(2) > table > tbody > tr > td.fw > table:nth-child(4) > tbody > tr > td > span",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "price",
"selector": "#___01 > tbody > tr:nth-child(3) > td > table:nth-child(2) > tbody > tr > td:nth-child(3) > table > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(3) > td.fw > table:nth-child(2) > tbody > tr > td > table > tbody > tr:nth-child(2) > td:nth-child(2) > table > tbody > tr > td.fw > table:nth-child(2) > tbody > tr > td > font:nth-child(3)",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "publisher",
"selector": "#___01 > tbody > tr:nth-child(3) > td > table:nth-child(2) > tbody > tr > td:nth-child(3) > table > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(3) > td.fw > table:nth-child(2) > tbody > tr > td > table > tbody > tr:nth-child(2) > td:nth-child(2) > table > tbody > tr > td.fw > table:nth-child(4) > tbody > tr > td",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "feature",
"selector": "p.fw10:nth-of-type(2)",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "content",
"selector": "p.fw10:nth-of-type(3)",
"regex": "",
"delay": ""
}, {
"parentSelectors": ["pagelink"],
"type": "SelectorText",
"multiple": false,
"id": "toc",
"selector": "p.fw10:nth-of-type(4)",
"regex": "",
"delay": ""
}],
"_id": "opentech"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment