Last active
May 14, 2018 07:23
-
-
Save miracle777/9bca56cfc4f63006f54b36be6e855d1d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
//URL読み込みテスト | |
$urldata; //読み込みURL | |
$count; //取り出し回数 | |
$check_count; //探す文字の出現回数 | |
$html_base; //読み込んだページソース | |
$check_html; //探す文字列 | |
$check_html_end; //探す文字列の終わりを調べる文字列 | |
$check_point; //探す文字の位置 | |
$contents_html = array(); //取り出したコンテンツの塊 | |
$n = array(); //取り出し位置 | |
$ok_html_data = array(); //完成したデータ | |
$html_br = "<br>"; | |
$next_page_url; //次の読み込みページ | |
$ok_count = 0; //完成した配列のインデックス作る | |
//データセット | |
$check_html = "class=\"copy_link_to_post\"" ; | |
$check_html_end=">投稿へのリンクをコピー</button>"; | |
$check_html_next ="class=\"next-link\""; //次のページのリンクの目印 | |
$check_html_next_end = "\"></a></div>"; //次のページのリンクの要素の最後 | |
//ループ初回カウント | |
$loop_count = 0; | |
//NEXTの文字数控え | |
$next_point = 0; | |
//HTML読み込み | |
$urldata ="https://valu.is/vipmasaru21"; | |
if ($loop_count == 0 ){ | |
read_html($urldata); | |
} | |
//継続してページを読み込む | |
while ($loop_count >= 1 ){ | |
echo $loop_count; | |
echo "ページを読み込みました"; | |
echo $html_br; | |
read_html($next_page_url); | |
} | |
//読み込み完了 完成データ表示 | |
ok_data(); | |
exit; | |
//データ読み込み | |
sleep(10); | |
function read_html($urldata){ | |
$html_base = file_get_contents( $urldata ); | |
global $loop_count,$check_html,$html_br,$check_html_next_end,$check_html_next,$check_html_end,$ok_count,$ok_html_data,$next_page_url,$next_point; | |
$contents_html = array(); //取り出したコンテンツの塊 | |
$n = array(); //取り出し位置 | |
$loop_count = $loop_count +1; | |
//文字の出現回数 | |
$check_count = substr_count( $html_base, $check_html ); | |
echo "出現回数 "; | |
echo $check_count; | |
echo $html_br; | |
//取り出し | |
for ($count = 0; $count < $check_count; $count++){ | |
if ($count === 0){ | |
$n[$count] = strpos($html_base, $check_html); | |
}else{ | |
$n[$count] = strpos($html_base, $check_html,($n[$count -1] +1)); | |
} | |
$n[$count] = $n[$count] +37; | |
$point_end = strpos($html_base, $check_html_end, $n[$count]+1 ); | |
$strlen = ($point_end -1) - ($n[$count]); //取り出す文字数計算 | |
echo $html_br; | |
echo $count +1; //データ数 | |
echo "個のデータ"; | |
echo $html_br; | |
$contents_html[$count] = substr( $html_base,$n[$count],$strlen); | |
echo $contents_html[$count]; | |
} | |
echo $html_br; | |
echo "データ整理 重複データを削除"; | |
echo $html_br; | |
$cnt = count($contents_html); //配列の要素数確認 | |
for ($count = 0; $count < $check_count; $count = $count +2){ | |
if (($count +1 ) < $cnt) { | |
if ($contents_html[$count] == $contents_html[$count +1] ){ | |
echo "消したデータ"; | |
echo ($count+1); | |
echo "番目"; | |
echo $html_br; | |
echo $contents_html[$count +1]; | |
echo $html_br; | |
unset($contents_html[($count +1)]); | |
} | |
$ok_html_data[$ok_count] = $contents_html[$count]; //完成データ格納 | |
$ok_count++; | |
} | |
} | |
//次を読み込む | |
$next_ctr = strpos($html_base,$check_html_next); //場所の頭の位置 | |
if ($next_ctr === false ){ | |
//ページ最後までたどり着いた | |
$loop_count = 0; | |
return; | |
} | |
$next_ctr = strpos($html_base,$check_html_next); | |
$next_end = strpos($html_base,$check_html_next_end,$next_ctr); | |
$strlen = ($next_end) - ($next_ctr+28); //取り出す文字数計算 | |
$next_page_url = substr( $html_base,($next_ctr+27),($strlen +1)); | |
if ($next_point == 0 ){ | |
$next_point = $strlen; //最初の読み込みを控える | |
} | |
//URLの長さ確認 | |
if ($strlen != $next_point){ | |
$ERR_next_MAX_point = strpos($html_base,"?max_id=",$next_ctr); | |
$ERR_next_id = substr( $html_base,($ERR_next_MAX_point +1),15); | |
$next_page_url = substr_replace($next_page_url ,$ERR_next_id , $ERR_next_MAX_point+1); | |
} | |
echo $html_br; | |
echo $strlen; | |
echo $html_br; | |
echo "NEXT URL="; | |
echo $next_page_url; | |
echo $html_br; | |
$next_point = $strlen; //最初の読み込みを控える | |
return($next_page_url); | |
//ここが関数の終わり | |
} | |
function ok_data(){ | |
//データ完成 | |
global $ok_html_data,$html_br; | |
$cnt = count($ok_html_data); //配列の要素数確認 | |
echo "<h1> ----------------------------- </h1>"; | |
echo "配列要素数"; | |
echo $cnt; | |
echo $html_br; | |
for ($count = 0; $count <= ($cnt -1 ) ;$count++){ | |
echo "<h1> ----------------------------- </h1>"; | |
echo $count +1; | |
echo "番目"; | |
echo $html_br; | |
echo $ok_html_data[$count]; | |
echo $html_br; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment