Skip to content

Instantly share code, notes, and snippets.

@fushihara
Created April 7, 2012 01:44
Show Gist options
  • Save fushihara/2324451 to your computer and use it in GitHub Desktop.
Save fushihara/2324451 to your computer and use it in GitHub Desktop.
internetTVGuideを解析
<?php
class internetTVGuide{
//エリアコード
const AREA_HOKKAIDO=1;
const AREA_AOMORI=2;
const AREA_IWATE=3;
const AREA_MIYAGI=4;
const AREA_AKITA=5;
const AREA_YAMAGATA=6;
const AREA_FUKUSHIMA=7;
const AREA_IBARAKI=8;
const AREA_TOCHIGI=9;
const AREA_GUNMA=10;
const AREA_SAITAMA=11;
const AREA_CHIBA=12;
const AREA_TOKYO=13;
const AREA_KANAGAWA=14;
const AREA_NIIGATA=15;
const AREA_TOYAMA=16;
const AREA_ISHIKAWA=17;
const AREA_FUKUI=18;
const AREA_YAMANASHI=19;
const AREA_NAGANO=20;
const AREA_GIFU=21;
const AREA_SHIZUOKA=22;
const AREA_AICHI=23;
const AREA_MIE=24;
const AREA_SHIGA=25;
const AREA_KYOTO=26;
const AREA_OSAKA=27;
const AREA_HYOGO=28;
const AREA_NARA=29;
const AREA_WAKAYAMA=30;
const AREA_TOTTORI=31;
const AREA_SHIMANE=32;
const AREA_OKAYAMA=33;
const AREA_HIROSHIMA=34;
const AREA_YAMAGUCHI=35;
const AREA_TOKUSHIMA=36;
const AREA_KAGAWA=37;
const AREA_EHIME=38;
const AREA_KOCHI=39;
const AREA_FUKUOKA=40;
const AREA_SAGA=41;
const AREA_NAGASAKI=42;
const AREA_KUMAMOTO=43;
const AREA_OITA=44;
const AREA_MIYAZAKI=45;
const AREA_KAGOSHIMA=46;
const AREA_OKINAWA=47;
//プロパティ
public $area;
public $data;
public $packList;
function __construct($area){
$this->area=$area;
$this->packList=array();
$this->fLoad();
}
function search($call){
$result=array();
foreach($this->data as $v){
if($call($v)===true){
$result[]=$v;
}
}
return $result;
}
function update($saveBack=true){
//新しい番組表を取得
$today=time();
$newData=array();
for($day=0;$day<=7;$day++){
$urlDate=date("Ymd",$today+($day*60*60*24));
$url="http://www.tvguide.or.jp/TF1101LS.php?regionId={$this->area}&mediaId=1&stationId=&date={$urlDate}&dispflg=0&page=1&time=24";
$souce=file_get_contents($url);
$this->analyzeSouce($souce,$newData);
}
if($saveBack===true){
$oldData=$this->data;
}else{
$oldData=array();
}
$this->margeProgram($oldData,$newData);
//ソート
usort($oldData,function($a,$b){
//fromでソート、fromが同一ならchIdでソート
if($a["from"]>$b["from"]){
return -1;
}else if($a["from"]<$b["from"]){
return 1;
}
if($a["chId"]>$b["chId"]){
return -1;
}else{
return 1;
}
});
$this->data=$oldData;
$this->fSave();
}
function margeProgram(&$old,$new){
//newの+-24時間に同じpid*cidがあれば上書き
foreach($new as $v){
$overWrite=false;
$ch=$v["chId"];
$timeA=$v["from"]+(60*60*24);
$timeB=$v["from"]-(60*60*24);
$pid=$v["programID"];
foreach($old as $x=>$w){
if($ch==$w["chId"] && $w["from"]==$v["from"] && $w["to"]==$v["to"]){
//同一chで同一時刻に番組がある
$old[$x]=$v;
$overWrite=true;
}else if($pid==$w["programID"] && $ch==$w["chId"] && $w["from"]<=$timeA && $w["from"]>=$timeB){
//同一chで±24時間以内に同一PIDがある
$old[$x]=$v;
$overWrite=true;
}
}
if($overWrite===false){
$old[]=$v;
}
}
}
function analyzeSouce($souce,&$addData=null){
if($addData===null){
$addData=array();
}
$tomorrow=array();//明日
$beforeTime=array();//これが戻ったら日付を超えた事になる
$souce=preg_replace('{\r|\n|\t}',"",$souce);
preg_match_all('{<td rowspan=.+?</table></td>}',$souce,$m,PREG_SET_ORDER);
foreach($m as $v){
//タイトル、詳細、開始の時分、チャンネル番号が取れる
$souce1=$v[0];
if(!preg_match('{<div class="program_text_3">(.*?)</div>}i',$souce1)){continue;}
preg_match('{rowspan="(\d+)"}',$souce1,$m2);
$programLen=$m2[1]*60;
preg_match('{<A.+?>(.+?)</A>}i',$souce1,$m2);
$programTitle=$m2[1];
preg_match('{<div class="program_text_3">(.*?)</div>}i',$souce1,$m2);
$programInfo=$m2[1];
preg_match('{broadcastingDay=(\d+)-(\d+)-(\d+)}i',$souce1,$m2);
$programDate=mktime(0,0,0,$m2[2],$m2[3],$m2[1]);
preg_match('{programId=(\d+)}i',$souce1,$m2);
$programId=$m2[1];
preg_match('{packNo=(\d+)}i',$souce1,$m2);
$programStationId=$m2[1];
$programStationName=$this->pack2station($programStationId);
//カテゴリ
preg_match('{bgcolor="(#[0-9a-f]*)"}i',$souce1,$m2);
$programCategory=$this->rgb2cat($m2[1]);
//フラグ
$programFlags=$this->img2flag($souce1);
//開始時分。翌日かどうかの判定もあるからめんどい?
preg_match('{<div class="program_text_1">((\d+):(\d+))</div>}i',$souce1,$m2);
$programStart=$m2[2]*60*60+$m2[3]*60;
if(isset($beforeTime[$programStationId]) && $programStart<$beforeTime[$programStationId]){
//時分が戻ったから昨日です
$tomorrow[$programStationId]=true;
}else if(!isset($beforeTime[$programStationId])){
$tomorrow[$programStationId]=false;
}
$beforeTime[$programStationId]=$programStart;
if($tomorrow[$programStationId]===true){
$programStart+=(60*60*24);
}
$programStart+=$programDate;
$programEnd=$programStart+$programLen;
//必要な情報が集まったから追加するデータを作る
$addObj=array();
$addObj["chName"]=$programStationName;
$addObj["chId"]=$programStationId;
$addObj["from"]=$programStart;
$addObj["to"]=$programEnd;
$addObj["title"]=$programTitle;
$addObj["info"]=$programInfo;
$addObj["category"]=$programCategory;
$addObj["programID"]=$programId;
$addObj["flag"]=$programFlags;
$addData[]=$addObj;
}
}
function fSave(){
$path=__DIR__."\\internetTVGuide_{$this->area}.json";
$str="";
foreach($this->data as $v){
$vl=json_encode($v);
$vl=preg_replace_callback('{\\\\u([0-9a-f]{4})}',function($m){return mb_convert_encoding(hex2bin($m[1]),'UTF-8','UTF-16');},$vl);
$vl=str_replace(['\/','&gt;','&lt;','&apos;','&amp;'],['/','>','<','\'','&'],$vl);
$str.=$vl."\n";
}
file_put_contents($path,$str);
}
function json_jap($str){
$str=preg_replace_callback('{\\\\u([0-9a-f]{4})}',function($m){return mb_convert_encoding(hex2bin($m[1]),'UTF-8','UTF-16');},$str);
$str=str_replace(['\/','&gt;','&lt;','&apos;','&amp;'],['/','>','<','\'','&'],$str);
return $str;
}
function fLoad(){
$data=array();
$path=__DIR__."\\internetTVGuide_{$this->area}.json";
if(is_file($path)){
$lines=file($path,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES);
foreach($lines as $v){
$nv=json_decode($v,true);
if($nv!==null){
$data[]=json_decode($v,true);
}
}
}
$this->data=$data;
}
//htmlソース解析の個別処理
function img2flag($souce){
$ret=array();
preg_match_all('{"http://www.tvguide.or.jp/image/icon/(\d+)\.gif"}',$souce,$m);
foreach($m[1] as $v){
switch($v){
case "27";$ret[]="news";break;
case "45";$ret[]="data";break;
case "29";$ret[]="repeat";break;
case "12";$ret[]="bilingual";break;
case "11";$ret[]="mSound";break;
case "17";$ret[]="mString";break;
case "26";$ret[]="new";break;
case "18";$ret[]="hand";break;
case "30";$ret[]="final";break;
case "10";$ret[]="stereo";break;
case "13";$ret[]="bMode";break;
case "20";$ret[]="free";break;
case "21";$ret[]="mono";break;
case "28";$ret[]="weather";break;
case "41";$ret[]="digital";break;
case "42";$ret[]="1ch";break;
case "43";$ret[]="2ch";break;
case "44";$ret[]="3ch";break;
case "46";$ret[]="2way";break;
case "47";$ret[]="multi";break;
case "48";$ret[]="surround";break;
case "49";$ret[]="live";break;
case "50";$ret[]="cc";break;
case "51";$ret[]="oc";break;
case "52";$ret[]="dub";break;
case "53";$ret[]="ppv";break;
case "54";$ret[]="ppv";break;
default;break;
}
}
return $ret;
}
function rgb2cat($rgb){
switch($rgb){
case "#" ;return "none";
case "#ffffcc";return "drama";
case "#ffcccc";return "movie";
case "#ccffcc";return "music";
case "#ccccff";return "sport";
case "#ffcc99";return "cook";
case "#ccffff";return "anime";
default;return $rgb;
}
}
function pack2station($id){
if(isset($this->packList[$id])){
return $this->packList[$id];
}
$urlDay=date("Y-m-d");
$url="http://www.tvguide.or.jp/TF0001SY.php?programId=1&broadcastingDay={$urlDay}&packNo={$id}";
$souce=file_get_contents($url);
$souce=preg_replace('{\r|\n|\t}',"",$souce);
preg_match('{<div class="content_head_text_2" id="explanation_object_date">[0-9/ ~:]+((.+?))}',$souce,$m);
$this->packList[$id]=$m[1];
$this->packList[$id]=preg_replace('{〜}',"~",$this->packList[$id]);
return $this->packList[$id];
}
}
/*
保存ファイルは県ごとに作成する
\internetTVGuide_23.json
{chName:NHK総合,chId:4142,from:1234567890,to:1234567890,title:番組名,info:番組内容:category:none,flag[news,weather],programID:65536}
http://www.tvguide.or.jp//image/support/program_icon.gif
news weather new final
repeat stereo bilingual hand
mSound mString live bMode
digital wide mono free
2way ppv hv data
multi surround crear 1ch
2ch 3ch cc oc
dub
http://www.tvguide.or.jp//image/support/program_color.gif
drama movie music sport cook anime
番組情報を取得するのが
get(callback bool)
番組情報を更新するのが
update(過去の番組情報も残すか=true)
ここでupdate(false)とすると過去の情報も一気に削除して保存する
更新時、±24時間の内にprogramIDが重複する番組があれば、それを消す
dataプロパティのフォーマット
jsonそのまま。
fromの順番でソート。0が最新、と言うか一週間後
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment