Created
October 30, 2012 15:22
-
-
Save kiichi54321/3980898 to your computer and use it in GitHub Desktop.
Rawler ブログからの記事、コメント、画像の取得
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<Data Stock="True" xmlns="clr-namespace:Rawler.Tool;assembly=Rawler" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" > | |
<CreateFolder FolderName="data"></CreateFolder> | |
<TsvReadLines FileName="data/NameUrlList.tsv"> | |
<Iterator> | |
<Iterator.SourceTree> | |
<GetTsvValue ColumnName="Url"> | |
<IteratorSourceClear></IteratorSourceClear> | |
<Page > | |
<Tags Tag="div" ParameterFilter="recent_entries_list"> | |
<Tags Tag="ul"> | |
<Tags Tag="li"> | |
<Links > | |
<Link.Query> | |
<QueryFirst></QueryFirst> | |
</Link.Query> | |
<IteratorSourceAddText></IteratorSourceAddText> | |
</Links> | |
</Tags> | |
</Tags> | |
</Tags> | |
<Links TagFilter="nextPage"> | |
<Links.Query> | |
<QueryFirst></QueryFirst> | |
</Links.Query> | |
<NextPage></NextPage> | |
</Links> | |
</Page> | |
</GetTsvValue> | |
</Iterator.SourceTree> | |
<Report></Report> | |
<Page VisbleErr="True"> | |
<PageOnce> | |
<FileSave FileName="data/BlogData.tsv" FileSaveMode="Append" Stock="False"> | |
<GetTsvValue ColumnName="Name"> | |
<DataWrite Attribute="Name"></DataWrite> | |
</GetTsvValue> | |
<GetPageUrl UrlType="Start"> | |
<DataWrite Attribute="PageUrl"></DataWrite> | |
</GetPageUrl> | |
<Tags Tag="div" ClassName="entry"> | |
<Tags Tag="h3" ClassName="title"> | |
<Tags.Query> | |
<QueryFirst></QueryFirst> | |
</Tags.Query> | |
<RemoveHtmlComment> | |
<Trim DoChopReturnCode="True"> | |
<DataWrite Attribute="Title"></DataWrite> | |
</Trim> | |
</RemoveHtmlComment> | |
</Tags> | |
<Tags Tag="span" ClassName="date"> | |
<Trim DoChopReturnCode="True"> | |
<DataWrite Attribute="Date"></DataWrite> | |
</Trim> | |
</Tags> | |
<ClipText StartClip="<!-- google_ad_section_start" EndClip="<!-- google_ad_section_end" UseInner="False"> | |
<AppendText Footer="-->"> | |
<RemoveHtmlComment> | |
<Trim DoChopReturnCode="True"> | |
<DataWrite Attribute="contents"></DataWrite> | |
</Trim> | |
</RemoveHtmlComment> | |
</AppendText> | |
</ClipText> | |
</Tags> | |
<NextDataRow></NextDataRow> | |
</FileSave> | |
<FileSave FileName="data/ImageData.tsv" FileSaveMode="Append" Stock="False"> | |
<ClipText StartClip="<!-- google_ad_section_start" EndClip="<!-- google_ad_section_end" UseInner="False"> | |
<AppendText Footer="-->"> | |
<ImageLinks UrlFilter=".jpg"> | |
<GetTsvValue ColumnName="Name"> | |
<DataWrite Attribute="Name"></DataWrite> | |
</GetTsvValue> | |
<GetPageUrl UrlType="Start"> | |
<DataWrite Attribute="PageUrl"></DataWrite> | |
</GetPageUrl> | |
<DownLoad > | |
<DownLoad.FolderNameTree> | |
<GetTsvValue ColumnName="Name"> | |
<AppendText Header="data/" Footer="image"></AppendText> | |
</GetTsvValue> | |
</DownLoad.FolderNameTree> | |
<GetTsvValue ColumnName="Name"> | |
<AppendText Header="data/" Footer="image"> | |
<DataWrite Attribute="folder"></DataWrite> | |
</AppendText> | |
</GetTsvValue> | |
<GetFileName> | |
<DataWrite Attribute="image"></DataWrite> | |
</GetFileName> | |
</DownLoad> | |
<NextDataRow></NextDataRow> | |
</ImageLinks> | |
</AppendText> | |
</ClipText> | |
</FileSave> | |
</PageOnce> | |
<FileSave FileSaveMode="Append" FileName="data/CommentData.tsv" Stock="False"> | |
<ClipText StartClip="<div class="each_comment">" EndClip="<!--//.each_comment-->"> | |
<GetTsvValue ColumnName="Name"> | |
<DataWrite Attribute="Name"></DataWrite> | |
</GetTsvValue> | |
<GetPageUrl UrlType="Start"> | |
<DataWrite Attribute="Url"></DataWrite> | |
</GetPageUrl> | |
<ClipText StartClip="<span class="comment_date">" EndClip="<span class="comment_gotoform">"> | |
<TagClear> | |
<Trim DoChopReturnCode="True"> | |
<DataWrite Attribute="Date"></DataWrite> | |
</Trim> | |
</TagClear> | |
</ClipText> | |
<Tags Tag="p" ClassName="label"> | |
<Trim DoChopReturnCode="True"> | |
<DataWrite Attribute="Title"></DataWrite> | |
</Trim> | |
</Tags> | |
<ClipText StartClip="<p class="comment_body">" EndClip="<div class="comment_footer">"> | |
<TagClear> | |
<Trim DoChopReturnCode="True"> | |
<DataWrite Attribute="Content"></DataWrite> | |
</Trim> | |
</TagClear> | |
</ClipText> | |
<Tags Tag="span" ClassName="comment_author"> | |
<TagClear> | |
<DataWrite Attribute="Author"></DataWrite> | |
</TagClear> | |
<Links VisbleType="Url"> | |
<DataWrite Attribute="AuthorUrl"></DataWrite> | |
</Links> | |
</Tags> | |
<NextDataRow/> | |
</ClipText> | |
</FileSave> | |
<Tags Tag="div" ClassName="page commentPaging"> | |
<Tags.Query> | |
<QueryDistinct></QueryDistinct> | |
</Tags.Query> | |
<Links TagFilter="nextPage" IsSingle="True"> | |
<NextPage SleepSecondTime="1"></NextPage> | |
</Links> | |
</Tags> | |
</Page> | |
</Iterator> | |
</TsvReadLines> | |
</Data> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment