Skip to content

Instantly share code, notes, and snippets.

@miracle777
Last active May 14, 2018 07:23
Show Gist options
  • Save miracle777/9bca56cfc4f63006f54b36be6e855d1d to your computer and use it in GitHub Desktop.
Save miracle777/9bca56cfc4f63006f54b36be6e855d1d to your computer and use it in GitHub Desktop.
<?php
//URL読み込みテスト
$urldata; //読み込みURL
$count; //取り出し回数
$check_count; //探す文字の出現回数
$html_base; //読み込んだページソース
$check_html; //探す文字列
$check_html_end; //探す文字列の終わりを調べる文字列
$check_point; //探す文字の位置
$contents_html = array(); //取り出したコンテンツの塊
$n = array(); //取り出し位置
$ok_html_data = array(); //完成したデータ
$html_br = "<br>";
$next_page_url; //次の読み込みページ
$ok_count = 0; //完成した配列のインデックス作る
//データセット
$check_html = "class=\"copy_link_to_post\"" ;
$check_html_end=">投稿へのリンクをコピー</button>";
$check_html_next ="class=\"next-link\""; //次のページのリンクの目印
$check_html_next_end = "\"></a></div>"; //次のページのリンクの要素の最後
//ループ初回カウント
$loop_count = 0;
//NEXTの文字数控え
$next_point = 0;
//HTML読み込み
$urldata ="https://valu.is/vipmasaru21";
if ($loop_count == 0 ){
read_html($urldata);
}
//継続してページを読み込む
while ($loop_count >= 1 ){
echo $loop_count;
echo "ページを読み込みました";
echo $html_br;
read_html($next_page_url);
}
//読み込み完了 完成データ表示
ok_data();
exit;
//データ読み込み
sleep(10);
function read_html($urldata){
$html_base = file_get_contents( $urldata );
global $loop_count,$check_html,$html_br,$check_html_next_end,$check_html_next,$check_html_end,$ok_count,$ok_html_data,$next_page_url,$next_point;
$contents_html = array(); //取り出したコンテンツの塊
$n = array(); //取り出し位置
$loop_count = $loop_count +1;
//文字の出現回数
$check_count = substr_count( $html_base, $check_html );
echo "出現回数 ";
echo $check_count;
echo $html_br;
//取り出し
for ($count = 0; $count < $check_count; $count++){
if ($count === 0){
$n[$count] = strpos($html_base, $check_html);
}else{
$n[$count] = strpos($html_base, $check_html,($n[$count -1] +1));
}
$n[$count] = $n[$count] +37;
$point_end = strpos($html_base, $check_html_end, $n[$count]+1 );
$strlen = ($point_end -1) - ($n[$count]); //取り出す文字数計算
echo $html_br;
echo $count +1; //データ数
echo "個のデータ";
echo $html_br;
$contents_html[$count] = substr( $html_base,$n[$count],$strlen);
echo $contents_html[$count];
}
echo $html_br;
echo "データ整理 重複データを削除";
echo $html_br;
$cnt = count($contents_html); //配列の要素数確認
for ($count = 0; $count < $check_count; $count = $count +2){
if (($count +1 ) < $cnt) {
if ($contents_html[$count] == $contents_html[$count +1] ){
echo "消したデータ";
echo ($count+1);
echo "番目";
echo $html_br;
echo $contents_html[$count +1];
echo $html_br;
unset($contents_html[($count +1)]);
}
$ok_html_data[$ok_count] = $contents_html[$count]; //完成データ格納
$ok_count++;
}
}
//次を読み込む
$next_ctr = strpos($html_base,$check_html_next); //場所の頭の位置
if ($next_ctr === false ){
//ページ最後までたどり着いた
$loop_count = 0;
return;
}
$next_ctr = strpos($html_base,$check_html_next);
$next_end = strpos($html_base,$check_html_next_end,$next_ctr);
$strlen = ($next_end) - ($next_ctr+28); //取り出す文字数計算
$next_page_url = substr( $html_base,($next_ctr+27),($strlen +1));
if ($next_point == 0 ){
$next_point = $strlen; //最初の読み込みを控える
}
//URLの長さ確認
if ($strlen != $next_point){
$ERR_next_MAX_point = strpos($html_base,"?max_id=",$next_ctr);
$ERR_next_id = substr( $html_base,($ERR_next_MAX_point +1),15);
$next_page_url = substr_replace($next_page_url ,$ERR_next_id , $ERR_next_MAX_point+1);
}
echo $html_br;
echo $strlen;
echo $html_br;
echo "NEXT URL=";
echo $next_page_url;
echo $html_br;
$next_point = $strlen; //最初の読み込みを控える
return($next_page_url);
//ここが関数の終わり
}
function ok_data(){
//データ完成
global $ok_html_data,$html_br;
$cnt = count($ok_html_data); //配列の要素数確認
echo "<h1> ----------------------------- </h1>";
echo "配列要素数";
echo $cnt;
echo $html_br;
for ($count = 0; $count <= ($cnt -1 ) ;$count++){
echo "<h1> ----------------------------- </h1>";
echo $count +1;
echo "番目";
echo $html_br;
echo $ok_html_data[$count];
echo $html_br;
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment