Skip to content

Instantly share code, notes, and snippets.

@kiichi54321
Created October 30, 2012 15:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kiichi54321/3980898 to your computer and use it in GitHub Desktop.
Save kiichi54321/3980898 to your computer and use it in GitHub Desktop.
Rawler ブログからの記事、コメント、画像の取得
<Data Stock="True" xmlns="clr-namespace:Rawler.Tool;assembly=Rawler" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" >
<CreateFolder FolderName="data"></CreateFolder>
<TsvReadLines FileName="data/NameUrlList.tsv">
<Iterator>
<Iterator.SourceTree>
<GetTsvValue ColumnName="Url">
<IteratorSourceClear></IteratorSourceClear>
<Page >
<Tags Tag="div" ParameterFilter="recent_entries_list">
<Tags Tag="ul">
<Tags Tag="li">
<Links >
<Link.Query>
<QueryFirst></QueryFirst>
</Link.Query>
<IteratorSourceAddText></IteratorSourceAddText>
</Links>
</Tags>
</Tags>
</Tags>
<Links TagFilter="nextPage">
<Links.Query>
<QueryFirst></QueryFirst>
</Links.Query>
<NextPage></NextPage>
</Links>
</Page>
</GetTsvValue>
</Iterator.SourceTree>
<Report></Report>
<Page VisbleErr="True">
<PageOnce>
<FileSave FileName="data/BlogData.tsv" FileSaveMode="Append" Stock="False">
<GetTsvValue ColumnName="Name">
<DataWrite Attribute="Name"></DataWrite>
</GetTsvValue>
<GetPageUrl UrlType="Start">
<DataWrite Attribute="PageUrl"></DataWrite>
</GetPageUrl>
<Tags Tag="div" ClassName="entry">
<Tags Tag="h3" ClassName="title">
<Tags.Query>
<QueryFirst></QueryFirst>
</Tags.Query>
<RemoveHtmlComment>
<Trim DoChopReturnCode="True">
<DataWrite Attribute="Title"></DataWrite>
</Trim>
</RemoveHtmlComment>
</Tags>
<Tags Tag="span" ClassName="date">
<Trim DoChopReturnCode="True">
<DataWrite Attribute="Date"></DataWrite>
</Trim>
</Tags>
<ClipText StartClip="&lt;!-- google_ad_section_start" EndClip="&lt;!-- google_ad_section_end" UseInner="False">
<AppendText Footer="--&gt;">
<RemoveHtmlComment>
<Trim DoChopReturnCode="True">
<DataWrite Attribute="contents"></DataWrite>
</Trim>
</RemoveHtmlComment>
</AppendText>
</ClipText>
</Tags>
<NextDataRow></NextDataRow>
</FileSave>
<FileSave FileName="data/ImageData.tsv" FileSaveMode="Append" Stock="False">
<ClipText StartClip="&lt;!-- google_ad_section_start" EndClip="&lt;!-- google_ad_section_end" UseInner="False">
<AppendText Footer="--&gt;">
<ImageLinks UrlFilter=".jpg">
<GetTsvValue ColumnName="Name">
<DataWrite Attribute="Name"></DataWrite>
</GetTsvValue>
<GetPageUrl UrlType="Start">
<DataWrite Attribute="PageUrl"></DataWrite>
</GetPageUrl>
<DownLoad >
<DownLoad.FolderNameTree>
<GetTsvValue ColumnName="Name">
<AppendText Header="data/" Footer="image"></AppendText>
</GetTsvValue>
</DownLoad.FolderNameTree>
<GetTsvValue ColumnName="Name">
<AppendText Header="data/" Footer="image">
<DataWrite Attribute="folder"></DataWrite>
</AppendText>
</GetTsvValue>
<GetFileName>
<DataWrite Attribute="image"></DataWrite>
</GetFileName>
</DownLoad>
<NextDataRow></NextDataRow>
</ImageLinks>
</AppendText>
</ClipText>
</FileSave>
</PageOnce>
<FileSave FileSaveMode="Append" FileName="data/CommentData.tsv" Stock="False">
<ClipText StartClip="&lt;div class=&quot;each_comment&quot;&gt;" EndClip="&lt;!--//.each_comment--&gt;">
<GetTsvValue ColumnName="Name">
<DataWrite Attribute="Name"></DataWrite>
</GetTsvValue>
<GetPageUrl UrlType="Start">
<DataWrite Attribute="Url"></DataWrite>
</GetPageUrl>
<ClipText StartClip="&lt;span class=&quot;comment_date&quot;&gt;" EndClip="&lt;span class=&quot;comment_gotoform&quot;&gt;">
<TagClear>
<Trim DoChopReturnCode="True">
<DataWrite Attribute="Date"></DataWrite>
</Trim>
</TagClear>
</ClipText>
<Tags Tag="p" ClassName="label">
<Trim DoChopReturnCode="True">
<DataWrite Attribute="Title"></DataWrite>
</Trim>
</Tags>
<ClipText StartClip="&lt;p class=&quot;comment_body&quot;&gt;" EndClip="&lt;div class=&quot;comment_footer&quot;&gt;">
<TagClear>
<Trim DoChopReturnCode="True">
<DataWrite Attribute="Content"></DataWrite>
</Trim>
</TagClear>
</ClipText>
<Tags Tag="span" ClassName="comment_author">
<TagClear>
<DataWrite Attribute="Author"></DataWrite>
</TagClear>
<Links VisbleType="Url">
<DataWrite Attribute="AuthorUrl"></DataWrite>
</Links>
</Tags>
<NextDataRow/>
</ClipText>
</FileSave>
<Tags Tag="div" ClassName="page commentPaging">
<Tags.Query>
<QueryDistinct></QueryDistinct>
</Tags.Query>
<Links TagFilter="nextPage" IsSingle="True">
<NextPage SleepSecondTime="1"></NextPage>
</Links>
</Tags>
</Page>
</Iterator>
</TsvReadLines>
</Data>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment