Last active
January 11, 2016 19:32
-
-
Save imasusen/82ecd4ec792fece9ec37 to your computer and use it in GitHub Desktop.
はてなフォトライフの画像を一括ダウンロードするやつ
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# パラメータは"f.hatena.ne.jp/hogehoge"の"hogehoge"の部分 | |
Param( | |
[parameter(Mandatory=$true)] | |
[string]$hatenaId | |
) | |
# 実行 | |
main -hatenaId $hatenaId | |
function main($hatenaId) { | |
$fotolifeUrl = 'f.hatena.ne.jp/' + $hatenaId | |
$res = Invoke-WebRequest $fotolifeUrl | |
# 保存先のフォルダをデスクトップに作る | |
$desktop = [Environment]::GetFolderPath([Environment+SpecialFolder]::Desktop) | |
$fotolifeTitle = getFotolifeTitle -res $res | |
$titleFolder = $desktop + '\' + $fotolifeTitle | |
New-Item $titleFolder -itemType Directory | Out-Null | |
#$logFile = $saveFolder + '\_log.txt' | |
#New-Item $logFile -itemType File | Out-Null | |
$folderNames = getFotolifeFolderNames -res $res | |
foreach ($folderName in $folderNames) { | |
$saveFolder = $titleFolder | |
if ($folderName -eq 'トップフォルダ') { | |
$fotolifeRssUrl = $fotolifeUrl + '/rss?sort=old' | |
} else { | |
$saveFolder += '\' + $folderName | |
New-Item $saveFolder -itemType Directory | Out-Null | |
$fotolifeRssUrl = $fotolifeUrl + '/' + $folderName + '/rss?sort=old' | |
} | |
# フォルダの各ページの画像をダウンロードする | |
$count = 0 | |
for ($page = 1; ; $page++) { | |
$rssPageUrl = $fotolifeRssUrl + '&page=' + $page | |
$rss = Invoke-WebRequest $rssPageUrl | |
$imageUrls = getImageUrls -res $rss | |
# 画像があればダウンロードする、なかったら今のフォルダのダウンロードをおわる | |
if ($imageUrls.Length -ne 0) { | |
downloadImages -imageUrls $imageUrls -savePath $saveFolder | |
$count += $imageUrls.Length | |
} else { | |
$saveFolder + ': ' + $count | |
break | |
} | |
} | |
} | |
} | |
function getFotolifeTitle($res) { | |
$res.ParsedHtml.title | |
} | |
function getFotolifeFolderNames($res) { | |
$fotolifeUrl = 'f.hatena.ne.jp/' + $hatenaId | |
$res = Invoke-WebRequest $fotolifeUrl | |
$folders = @($res.ParsedHtml.body.getElementsByTagName('ul') | | |
Where-Object {$_.getAttributeNode('id').Value -eq 'sidebar-folder-list'}).getElementsByTagName('a') | |
$folderNames = @() | |
foreach ($folder in $folders) { | |
$folderNames += $folder.outerText | |
} | |
$folderNames | |
} | |
function getImageUrls($res) { | |
$imageUrls = @() | |
foreach ($image in $res.Images) { | |
# 小さい画像は除く | |
$imageUrl = $image[0].src | |
$regex = [regex]($hatenaID+'/([0-9]+)/(?<fileName>([0-9]+(?<size>(|_m|_original)))\.(jpeg|jpg|png|gif|bmp))') | |
$matches = $regex.Matches($imageUrl) | |
if ($matches[0].Groups['size'].Value -eq '_m') { | |
continue | |
} | |
$imageUrls += $imageUrl | |
} | |
$imageUrls | |
} | |
function download($url, $savePath) { | |
$regex = [regex]'.+/(.+)$' | |
$fileName = $regex.Matches($url)[0].Groups[1].Value | |
Invoke-WebRequest $url -OutFile ($savePath + '\' + $fileName) | |
} | |
function downloadImages($imageUrls, $savePath) { | |
if ($imageUrls.Length -eq 0) { | |
return | |
} | |
foreach ($imageUrl in $imageUrls) { | |
download -url $imageUrl -savePath $savePath | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment