Skip to content

Instantly share code, notes, and snippets.

@amane-katagiri
Created May 27, 2017 06:16
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save amane-katagiri/7dce3d9627c1cb53583b1d5d32efd02b to your computer and use it in GitHub Desktop.
Save amane-katagiri/7dce3d9627c1cb53583b1d5d32efd02b to your computer and use it in GitHub Desktop.
pixiv scraping tool
#!/usr/bin/fish
function init_cookie
set LOGIN_URL "https://accounts.pixiv.net/login"
set TMP1 (mktemp)
set TMP2 (mktemp)
set TMP3 (mktemp)
if [ (count $argv) -ge 1 ]
set COOKIE $argv[1]
else
return 1
end
curl -c $COOKIE $LOGIN_URL > $TMP1
xmllint --xpath '//form//input[@type="hidden"]/@name' --html $TMP1 | \
sed -e 's/ name=\("\([^"]*\)"\|\'\([^\']*\)\'\)/\2\3\n/g' | \
nkf -WwMQ | tr "=" "%" > $TMP2
xmllint --xpath '//form//input[@type="hidden"]/@value' --html $TMP1 | \
sed -e 's/ value=\("\([^"]*\)"\|\'\([^\']*\)\'\)/\2\3\n/g' | \
nkf -WwMQ | tr "=" "%" > $TMP3
echo "pixiv_id" >> $TMP2
echo "password" >> $TMP2
if [ (count $argv) -ge 2 ]
echo $argv[2] >> $TMP3
else
read -p "set_color green; echo -n pixiv_id >&2; set_color normal; echo '> ' >&2" PIXIV_ID
echo $PIXIV_ID >> $TMP3
end
if [ (count $argv) -ge 3 ]
echo $argv[3] >> $TMP3
else
read -p "set_color green; echo -n password >&2; set_color normal; echo '> ' >&2" PASSWORD
echo $PASSWORD >> $TMP3
set -e PASSWORD
end
curl -b $COOKIE -c $COOKIE --data (paste -d "=" $TMP2 $TMP3 | tr "\n" "&") $LOGIN_URL
shred $TMP1 $TMP2 $TMP3
end
if [ (count $argv) -ge 1 ]
set PIXIV_ID $argv[1]
end
if [ (count $argv) -ge 2 ]
set PASSWORD $argv[2]
end
if [ (count $argv) -ge 3 ]
set COOKIE $argv[3]
else
set COOKIE (mktemp)
end
if [ ! -s $COOKIE ]
init_cookie $COOKIE $PIXIV_ID $PASSWORD
end
set RANKING (mktemp)
curl -L -b $COOKIE "https://www.pixiv.net/novel/ranking.php?mode=weekly_r18" > $RANKING
set IDS (mktemp)
seq 1 10 > $IDS
set URLS (mktemp)
xmllint --xpath '//div[@class="novel-right-contents"]//h1[@class="title"]/a/@href' --html $RANKING ^/dev/null | \
sed -e 's/ href=\("\([^"]*\)"\|\'\([^\']*\)\'\)/\2\3\n/g' | sed -e "s#^#http://www.pixiv.net#g" | head -n10 > $URLS
set NAMES (mktemp)
xmllint --xpath '//div[@class="novel-right-contents"]//li[@class="author"]/a/@data-user_name' --html $RANKING ^/dev/null | \
sed -e 's/ data-user_name=\("\([^"]*\)"\|\'\([^\']*\)\'\)/\2\3\n/g' | head -n10 > $NAMES
echo '| ID | 小説のURL | 作者名 |'
echo '|:- |:- |:- |'
paste -d '|' $IDS $URLS $NAMES | sed -e 's/|/ | /g' | sed -e 's/^/| /g' | sed -e 's/$/ |/g'
rm $RANKING $NAMES $URLS $IDS
if [ (count $argv) -ge 3 ]
echo "cookie is saved to "$COOKIE >&2
else
shred $COOKIE
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment