Skip to content

Instantly share code, notes, and snippets.

@kinoh
Created October 4, 2014 10:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kinoh/a20f08cd519c7354099a to your computer and use it in GitHub Desktop.
Save kinoh/a20f08cd519c7354099a to your computer and use it in GitHub Desktop.
scraping parameters
<?xml version="1.0" encoding="utf-8" ?>
<Site Uri="http://www.kinokuniya.co.jp/f/dsg-01-" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<Preprocess xsi:type="Replace" Old="liitemprop" New="li itemprop"/>
<Field Name="Title" Path="//h3[@itemprop='name']/text()[last()]" Pattern=".+"/>
<Field Name="Label" Path="//h3[@itemprop='name']/text()[position()!=last()]" Pattern=".+"/>
<Field Name="Authors" Path="//div[contains(@class,'infobox')]/ul/li[1]" Pattern="(?'name'[^/【】〈〉]+)(?=(?:〈(?'spell'[^〈〉]+)〉)?(?:/[^【]+)*【(?'role'[^】]+)】(?:〈(?'spell'[^〈〉]+)〉)?|$)"/>
<Field Name="Publisher" Path="//div[contains(@class,'infobox')]/ul/li[count(./a)>0][last()]/a" Pattern=".+"/>
<Field Name="Published" Path="//div[contains(@class,'infobox')]/ul/li[4]" Pattern="(?'year'\d{4})/(?'month'\d{2})発売"/>
<Field Name="Size" Path="//div[contains(@class,'infbox')]/ul/li[1]" Pattern="サイズ (.+?)/"/>
<Field Name="Page" Path="//div[contains(@class,'infbox')]/ul/li[1]" Pattern="ページ数 (\d+)p"/>
<Field Name="ISBN" Path="//div[contains(@class,'infbox')]/ul/li[2]" Pattern="\d{13}"/>
<Field Name="NDC" Path="//div[contains(@class,'infbox')]/ul/li[3]" Pattern="\d{3}(?:\.\d+)?"/>
<Field Name="Image" Path="//img[@itemprop='image']/@src" Pattern=".+" AdditionalArgs="CurrentUri"/>
</Site>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment