Skip to content

Instantly share code, notes, and snippets.

@misodengaku
Created November 15, 2012 16:03
Show Gist options
  • Save misodengaku/4079391 to your computer and use it in GitHub Desktop.
Save misodengaku/4079391 to your computer and use it in GitHub Desktop.
トトリちゃん画像収集用のアレ
using System;
using System.Collections.Generic;
using System.IO;
using System.Net;
using System.Text;
using System.Threading.Tasks;
namespace TotoriDownloader
{
class Program
{
static string savePath = @"i:\totori_cs2\";//保存先パス
//"https://www.google.co.jp/search?q=%E3%83%88%E3%83%88%E3%83%AA+-%E7%84%BC%E8%82%89+-%E5%88%91%E4%BA%8B&hl=ja&safe=off&sout=1&biw=1920&tbm=isch&sa=N"
static void Main(string[] args)
{
int i = 0;
Console.WriteLine("Start: " + DateTime.Now.ToString());
Parallel.For(i, 100, GetTotori);
/*for (var i = 0; i < 100; i++)
GetTotori(i);*/
Console.WriteLine("Completed!: " + DateTime.Now.ToString());
return;
}
static void GetTotori(int _page)
{
var page = _page * 20;
int done = 0, fail = 0;
Console.WriteLine("Page "+_page+" GET Start");
WebClient wc = new WebClient();
byte[] data = wc.DownloadData("https://www.google.co.jp/search?q=%E3%83%88%E3%83%88%E3%83%AA&hl=ja&safe=off&sout=1&biw=1920&tbm=isch&sa=N&start=" + page);
//Console.WriteLine("解析中");
Encoding enc = Encoding.GetEncoding("Shift_JIS");
string html = enc.GetString(data);
var links = html.Split(new string[] { "http://www.google.co.jp/imgres?imgurl=", "&amp;imgrefurl=" }, StringSplitOptions.RemoveEmptyEntries);
//string[] tototi = new string[30];
List<string> totori = new List<string>();
foreach (var item in links)
{
if (item.StartsWith("http") && (item.EndsWith("jpg") || item.EndsWith("png") || item.EndsWith("bmp") || item.EndsWith("gif")))
{
totori.Add(item);
//Console.WriteLine(item);
}
}
Parallel.ForEach(totori, (u) =>
{
try
{
var url = new Uri(u);
var filePath = Path.Combine(savePath, Path.GetFileName(url.LocalPath));
new WebClient().DownloadFile(url, filePath);
done++;
//Console.WriteLine("downloaded: {0} => {1}", url, filePath);
}
catch// (Exception e)
{
fail++;
//Console.WriteLine("failed: {0}, {1}", url, e);
}
});
//Console.WriteLine("end");
Console.WriteLine("Page " + _page + " End");
Console.WriteLine("Done: " + done + " Error: " + fail);
}
}
}
@misodengaku
Copy link
Author

細かいことは知らぬ

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment