Last active
June 27, 2017 11:38
-
-
Save konoha81/628fe147c48ff2925830 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use LWP::Simple; | |
use Crypt::SSLeay; | |
use XML::Simple; | |
use Data::Dumper; | |
print("\n***** 最新のpostから指定枚数だけ遡り、スコア順に並び替えて画像を取得します。\ | |
*****\n"); | |
print("***** 取得したい枚数およびタグと、うち上位何件を取得したいをかを入力してくださ\ | |
い。*****\n"); | |
print("***** control+x → control+c でキャンセルが出来ます。*****\n\n"); | |
print("取得したい枚数: "); | |
my $post = <STDIN>; | |
chomp($post); | |
if($post !~ /^[1-9]+/){ | |
die "入力は半角の正の整数のみ可能です。\n"; | |
} | |
print("取得したいタグ\n(指定しない場合はEnter。タグは1つのみ):"); | |
my $fetch_tags = <STDIN>; | |
chomp($fetch_tags); | |
# タグが入力されない場合の処理 | |
if($fetch_tags eq ''){ | |
$fetch_tags = ""; | |
} | |
=puts | |
print("上位何件: "); | |
my $rank = <STDIN>; | |
chomp($rank); | |
if($rank<=$post){ | |
}else{ | |
die "取得したい枚数より少ない数を入力してください。\n"; | |
} | |
=cut | |
print("score順?好み順? --- type [s] or [p]: "); | |
my $sorting = <STDIN>; | |
chomp($sorting); | |
if($sorting !~ /s|p/){ | |
die "入力は s か p のみ可能です。\n"; | |
} | |
#idをキーするタグとurlのハッシュ | |
my %tags_hash; | |
my %url_hash; | |
# 取得したpostのidの配列 | |
my @post_id; | |
#idとタグの配列 | |
my @id_tags; | |
#ソート用 | |
my %xml_data; | |
#nbスコアのハッシュ | |
my %nb_score; | |
my $like_num=0; | |
my ($post_tag, $post_url, $sorted_id) = &post_fetch($post); | |
my @sort_id = @$sorted_id; | |
#idとタグをタブ区切りでリストに入れる | |
for(my $i=0;$i<=$post;$i++ ){ | |
push(@id_tags,$post_id[$i]."\t".$post_tag->{$post_id[$i]}); | |
} | |
# 好み順のIDが返される | |
my ($nb_result_id, $like_sheets) = &nb(@id_tags); | |
my @result_id = @$nb_result_id; | |
print("Likeに $like_sheets 枚分類されました。\n"); | |
print("何枚取得しますか?:"); | |
my $fetch_like = <STDIN>; | |
chomp($fetch_like); | |
if($post !~ /^[1-9]+/){ | |
die "入力は半角の正の整数のみ可能です。\n"; | |
}elsif($fetch_like>$like_sheets){ | |
die "$like_sheets枚より少ない枚数を入力してください。\n"; | |
} | |
if($sorting eq "s"){ | |
# スコア順でソートしたIDを利用する | |
@result_id = @sort_id; | |
} | |
# 上位n件のurlから画像に日付の名前をつけて保存 | |
#日時の取得 | |
$ENV{'TZ'} = "JST-9"; | |
my ($sec,$min,$hour,$mday,$month,$year,$wday,$stime) = localtime; | |
$year -= 100; | |
$month += 1; | |
my $date = $year.sprintf("%02d%02d_",$month,$mday); | |
#新規フォルダの名前設定 | |
if($fetch_tags eq ''){ | |
$fetch_tags = "All"; | |
} | |
my $dir_name = ("$fetch_tags\_$date"); | |
if((-d $dir_name."1")){ | |
for(my $i=2;$i<1000;$i++){ | |
if((-d ($dir_name.$i))==0){ | |
$dir_name = ($dir_name.$i); | |
mkdir $dir_name; | |
last; | |
} | |
} | |
}else{ | |
$dir_name = ($dir_name."1"); | |
mkdir ($dir_name); | |
} | |
mkdir $dir_name; | |
for(my $k=0; $k<$fetch_like ;$k++){ | |
my $url = $post_url->{$result_id[$k]}; | |
my $html =get($url); | |
my $prefix = get_prefix($url); | |
open(OUT, "> ./$dir_name/$result_id[$k]$prefix"); | |
binmode OUT; | |
print OUT "$html"; | |
close(OUT); | |
my $count = $k+1; | |
print "Acquisition of the No\.$count sheet has been completed.\n"; | |
} | |
# 拡張子を取得 | |
sub get_prefix{ | |
my $url = shift; | |
my $start = rindex($url,"."); | |
my $end = length($url); | |
my $str = substr($url, $start, $end - $start); | |
return($str); | |
} | |
#postを取得する | |
sub post_fetch{ | |
#スコア順に並び替えたid | |
my @sort_id; | |
my $post_num= int($_[0]/75)+1 ; #1ページあたり75post | |
if(($_[0] % 75)==0){ | |
$post_num -= 1; | |
} | |
for(my $i=1;$i <= $post_num; $i++){ | |
#2page取得毎に1秒スリープ | |
if($i % 2 == 0){ | |
sleep 1; | |
my $progress = int(($i * 75/$_[0])*100); | |
if($progress<=100){ | |
print("wait...$progress\% \n"); | |
} | |
} | |
my $url = ""."$i"; | |
my $res = get($url); | |
my $xml = XML::Simple->new(keyattr=>{'post'}); | |
#XML構造を$dataに追加 | |
my $data = $xml->XMLin($res); | |
#75で割った余り分の処理 | |
my $rest=75; | |
if($i == $post_num){ | |
if($_[0] % 75 != 0){ | |
$rest = $_[0] % 75; | |
} | |
} | |
for(my $j=0;$j<$rest ;$j++){ | |
my $tags = $data->{'post'}[$j]->{'tags'}; | |
my $id = $data->{'post'}[$j]->{'id'}; | |
my $score = $data->{'post'}[$j]->{'score'}; | |
if($tags =~ /$_[1]|\n/){ | |
my $thum = $data->{'post'}[$j]->{'sample_url'}; | |
#タグとurlをそれぞれIDをkeyとするハッシュに入れる | |
$tags_hash{$id} = "$tags"; | |
$url_hash{$id} = "$thum"; | |
$xml_data{$id} = [$score,$thum]; | |
push(@post_id,$id); | |
}else{ | |
#何もしない | |
} | |
} | |
} | |
my @sort_id = &score_sort(%xml_data); | |
# print join("\n",@get_id); | |
return(\%tags_hash,\%url_hash, \@sort_id); | |
} | |
sub nb{ | |
# 出現確率ファイルを読み込む | |
open(LIKE, "Likeprob.txt") or die "$!"; | |
open(DISLIKE, "Dislikeprob.txt") or die "$!"; | |
my $like; | |
my $dislike; | |
my %like_hash = 0; | |
my %dislike_hash = 0; | |
while(<LIKE>){ | |
chomp; | |
my @list = split(/\t/); | |
$like_hash{$list[0]} = $list[1]; | |
} | |
close(LIKE); | |
while(<DISLIKE>){ | |
chomp; | |
my @list = split(/\t/); | |
$dislike_hash{$list[0]} = $list[1]; | |
} | |
close(DISLIKE); | |
# LIKE/DISLIKE判定後の配列 | |
#my %judge; | |
#my %judge_test; | |
# 教師データにはLikeが147個、Dislikeが96個存在する | |
my $like_num = 147; | |
my $dislike_num = 140; | |
my $total = $like_num + $dislike_num; | |
# 分類後のidのリスト | |
my @nb_id; | |
my $like_sheets = 0; | |
for(my $i=0; $i <= $#_ ; $i++){ | |
my $like = log($like_num/$total); | |
my $dislike = log($dislike_num/$total); | |
my @list = split(/\t/,$_[$i]); | |
my @taglist = split(/ /, @list[1]); | |
for(my $i=0; $i<$#taglist; $i++){ | |
if(defined($like_hash{$taglist[$i]})){ | |
$like += log($like_hash{$taglist[$i]}); | |
}else{ | |
$like += log(0.001); | |
} | |
if(defined($dislike_hash{$taglist[$i]})){ | |
$dislike += log($dislike_hash{$taglist[$i]}); | |
}else{ | |
$dislike += log(0.001); | |
} | |
} | |
# Like/Dislike判定 | |
if($like > $dislike){ | |
# print "$like $dislike Like\n"; | |
# $judge{@list[0]} = "Like"; | |
push(@nb_id, @list[0]); | |
# Like-DislikeのスコアをIDをキーとするハッシュに入れる | |
my $result = $like - $dislike; | |
$nb_score{@list[0]} = [$result]; | |
$like_sheets++; | |
}elsif($dislike > $like){ | |
# print "$like $dislike Dislike\n"; | |
# $judge{@list[0]} = "Dislike"; | |
}else{ | |
# print "$like $dislike same\n"; | |
} | |
$like = 0; | |
$dislike = 0; | |
} | |
my @nb_sort_id = &score_sort(%nb_score); | |
return(\@nb_sort_id, $like_sheets); | |
close(IN); | |
} | |
# スコア順で並び替え | |
sub score_sort{ | |
my %test = @_; | |
my @idsort; | |
for my $key(sort {$test{$b}[0] <=> $test{$a}[0] || $a cmp $b}keys %test){ | |
push(@idsort, $key); | |
} | |
return(@idsort); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment