Created
April 7, 2012 01:44
-
-
Save fushihara/2324451 to your computer and use it in GitHub Desktop.
internetTVGuideを解析
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class internetTVGuide{ | |
//エリアコード | |
const AREA_HOKKAIDO=1; | |
const AREA_AOMORI=2; | |
const AREA_IWATE=3; | |
const AREA_MIYAGI=4; | |
const AREA_AKITA=5; | |
const AREA_YAMAGATA=6; | |
const AREA_FUKUSHIMA=7; | |
const AREA_IBARAKI=8; | |
const AREA_TOCHIGI=9; | |
const AREA_GUNMA=10; | |
const AREA_SAITAMA=11; | |
const AREA_CHIBA=12; | |
const AREA_TOKYO=13; | |
const AREA_KANAGAWA=14; | |
const AREA_NIIGATA=15; | |
const AREA_TOYAMA=16; | |
const AREA_ISHIKAWA=17; | |
const AREA_FUKUI=18; | |
const AREA_YAMANASHI=19; | |
const AREA_NAGANO=20; | |
const AREA_GIFU=21; | |
const AREA_SHIZUOKA=22; | |
const AREA_AICHI=23; | |
const AREA_MIE=24; | |
const AREA_SHIGA=25; | |
const AREA_KYOTO=26; | |
const AREA_OSAKA=27; | |
const AREA_HYOGO=28; | |
const AREA_NARA=29; | |
const AREA_WAKAYAMA=30; | |
const AREA_TOTTORI=31; | |
const AREA_SHIMANE=32; | |
const AREA_OKAYAMA=33; | |
const AREA_HIROSHIMA=34; | |
const AREA_YAMAGUCHI=35; | |
const AREA_TOKUSHIMA=36; | |
const AREA_KAGAWA=37; | |
const AREA_EHIME=38; | |
const AREA_KOCHI=39; | |
const AREA_FUKUOKA=40; | |
const AREA_SAGA=41; | |
const AREA_NAGASAKI=42; | |
const AREA_KUMAMOTO=43; | |
const AREA_OITA=44; | |
const AREA_MIYAZAKI=45; | |
const AREA_KAGOSHIMA=46; | |
const AREA_OKINAWA=47; | |
//プロパティ | |
public $area; | |
public $data; | |
public $packList; | |
function __construct($area){ | |
$this->area=$area; | |
$this->packList=array(); | |
$this->fLoad(); | |
} | |
function search($call){ | |
$result=array(); | |
foreach($this->data as $v){ | |
if($call($v)===true){ | |
$result[]=$v; | |
} | |
} | |
return $result; | |
} | |
function update($saveBack=true){ | |
//新しい番組表を取得 | |
$today=time(); | |
$newData=array(); | |
for($day=0;$day<=7;$day++){ | |
$urlDate=date("Ymd",$today+($day*60*60*24)); | |
$url="http://www.tvguide.or.jp/TF1101LS.php?regionId={$this->area}&mediaId=1&stationId=&date={$urlDate}&dispflg=0&page=1&time=24"; | |
$souce=file_get_contents($url); | |
$this->analyzeSouce($souce,$newData); | |
} | |
if($saveBack===true){ | |
$oldData=$this->data; | |
}else{ | |
$oldData=array(); | |
} | |
$this->margeProgram($oldData,$newData); | |
//ソート | |
usort($oldData,function($a,$b){ | |
//fromでソート、fromが同一ならchIdでソート | |
if($a["from"]>$b["from"]){ | |
return -1; | |
}else if($a["from"]<$b["from"]){ | |
return 1; | |
} | |
if($a["chId"]>$b["chId"]){ | |
return -1; | |
}else{ | |
return 1; | |
} | |
}); | |
$this->data=$oldData; | |
$this->fSave(); | |
} | |
function margeProgram(&$old,$new){ | |
//newの+-24時間に同じpid*cidがあれば上書き | |
foreach($new as $v){ | |
$overWrite=false; | |
$ch=$v["chId"]; | |
$timeA=$v["from"]+(60*60*24); | |
$timeB=$v["from"]-(60*60*24); | |
$pid=$v["programID"]; | |
foreach($old as $x=>$w){ | |
if($ch==$w["chId"] && $w["from"]==$v["from"] && $w["to"]==$v["to"]){ | |
//同一chで同一時刻に番組がある | |
$old[$x]=$v; | |
$overWrite=true; | |
}else if($pid==$w["programID"] && $ch==$w["chId"] && $w["from"]<=$timeA && $w["from"]>=$timeB){ | |
//同一chで±24時間以内に同一PIDがある | |
$old[$x]=$v; | |
$overWrite=true; | |
} | |
} | |
if($overWrite===false){ | |
$old[]=$v; | |
} | |
} | |
} | |
function analyzeSouce($souce,&$addData=null){ | |
if($addData===null){ | |
$addData=array(); | |
} | |
$tomorrow=array();//明日 | |
$beforeTime=array();//これが戻ったら日付を超えた事になる | |
$souce=preg_replace('{\r|\n|\t}',"",$souce); | |
preg_match_all('{<td rowspan=.+?</table></td>}',$souce,$m,PREG_SET_ORDER); | |
foreach($m as $v){ | |
//タイトル、詳細、開始の時分、チャンネル番号が取れる | |
$souce1=$v[0]; | |
if(!preg_match('{<div class="program_text_3">(.*?)</div>}i',$souce1)){continue;} | |
preg_match('{rowspan="(\d+)"}',$souce1,$m2); | |
$programLen=$m2[1]*60; | |
preg_match('{<A.+?>(.+?)</A>}i',$souce1,$m2); | |
$programTitle=$m2[1]; | |
preg_match('{<div class="program_text_3">(.*?)</div>}i',$souce1,$m2); | |
$programInfo=$m2[1]; | |
preg_match('{broadcastingDay=(\d+)-(\d+)-(\d+)}i',$souce1,$m2); | |
$programDate=mktime(0,0,0,$m2[2],$m2[3],$m2[1]); | |
preg_match('{programId=(\d+)}i',$souce1,$m2); | |
$programId=$m2[1]; | |
preg_match('{packNo=(\d+)}i',$souce1,$m2); | |
$programStationId=$m2[1]; | |
$programStationName=$this->pack2station($programStationId); | |
//カテゴリ | |
preg_match('{bgcolor="(#[0-9a-f]*)"}i',$souce1,$m2); | |
$programCategory=$this->rgb2cat($m2[1]); | |
//フラグ | |
$programFlags=$this->img2flag($souce1); | |
//開始時分。翌日かどうかの判定もあるからめんどい? | |
preg_match('{<div class="program_text_1">((\d+):(\d+))</div>}i',$souce1,$m2); | |
$programStart=$m2[2]*60*60+$m2[3]*60; | |
if(isset($beforeTime[$programStationId]) && $programStart<$beforeTime[$programStationId]){ | |
//時分が戻ったから昨日です | |
$tomorrow[$programStationId]=true; | |
}else if(!isset($beforeTime[$programStationId])){ | |
$tomorrow[$programStationId]=false; | |
} | |
$beforeTime[$programStationId]=$programStart; | |
if($tomorrow[$programStationId]===true){ | |
$programStart+=(60*60*24); | |
} | |
$programStart+=$programDate; | |
$programEnd=$programStart+$programLen; | |
//必要な情報が集まったから追加するデータを作る | |
$addObj=array(); | |
$addObj["chName"]=$programStationName; | |
$addObj["chId"]=$programStationId; | |
$addObj["from"]=$programStart; | |
$addObj["to"]=$programEnd; | |
$addObj["title"]=$programTitle; | |
$addObj["info"]=$programInfo; | |
$addObj["category"]=$programCategory; | |
$addObj["programID"]=$programId; | |
$addObj["flag"]=$programFlags; | |
$addData[]=$addObj; | |
} | |
} | |
function fSave(){ | |
$path=__DIR__."\\internetTVGuide_{$this->area}.json"; | |
$str=""; | |
foreach($this->data as $v){ | |
$vl=json_encode($v); | |
$vl=preg_replace_callback('{\\\\u([0-9a-f]{4})}',function($m){return mb_convert_encoding(hex2bin($m[1]),'UTF-8','UTF-16');},$vl); | |
$vl=str_replace(['\/','>','<',''','&'],['/','>','<','\'','&'],$vl); | |
$str.=$vl."\n"; | |
} | |
file_put_contents($path,$str); | |
} | |
function json_jap($str){ | |
$str=preg_replace_callback('{\\\\u([0-9a-f]{4})}',function($m){return mb_convert_encoding(hex2bin($m[1]),'UTF-8','UTF-16');},$str); | |
$str=str_replace(['\/','>','<',''','&'],['/','>','<','\'','&'],$str); | |
return $str; | |
} | |
function fLoad(){ | |
$data=array(); | |
$path=__DIR__."\\internetTVGuide_{$this->area}.json"; | |
if(is_file($path)){ | |
$lines=file($path,FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES); | |
foreach($lines as $v){ | |
$nv=json_decode($v,true); | |
if($nv!==null){ | |
$data[]=json_decode($v,true); | |
} | |
} | |
} | |
$this->data=$data; | |
} | |
//htmlソース解析の個別処理 | |
function img2flag($souce){ | |
$ret=array(); | |
preg_match_all('{"http://www.tvguide.or.jp/image/icon/(\d+)\.gif"}',$souce,$m); | |
foreach($m[1] as $v){ | |
switch($v){ | |
case "27";$ret[]="news";break; | |
case "45";$ret[]="data";break; | |
case "29";$ret[]="repeat";break; | |
case "12";$ret[]="bilingual";break; | |
case "11";$ret[]="mSound";break; | |
case "17";$ret[]="mString";break; | |
case "26";$ret[]="new";break; | |
case "18";$ret[]="hand";break; | |
case "30";$ret[]="final";break; | |
case "10";$ret[]="stereo";break; | |
case "13";$ret[]="bMode";break; | |
case "20";$ret[]="free";break; | |
case "21";$ret[]="mono";break; | |
case "28";$ret[]="weather";break; | |
case "41";$ret[]="digital";break; | |
case "42";$ret[]="1ch";break; | |
case "43";$ret[]="2ch";break; | |
case "44";$ret[]="3ch";break; | |
case "46";$ret[]="2way";break; | |
case "47";$ret[]="multi";break; | |
case "48";$ret[]="surround";break; | |
case "49";$ret[]="live";break; | |
case "50";$ret[]="cc";break; | |
case "51";$ret[]="oc";break; | |
case "52";$ret[]="dub";break; | |
case "53";$ret[]="ppv";break; | |
case "54";$ret[]="ppv";break; | |
default;break; | |
} | |
} | |
return $ret; | |
} | |
function rgb2cat($rgb){ | |
switch($rgb){ | |
case "#" ;return "none"; | |
case "#ffffcc";return "drama"; | |
case "#ffcccc";return "movie"; | |
case "#ccffcc";return "music"; | |
case "#ccccff";return "sport"; | |
case "#ffcc99";return "cook"; | |
case "#ccffff";return "anime"; | |
default;return $rgb; | |
} | |
} | |
function pack2station($id){ | |
if(isset($this->packList[$id])){ | |
return $this->packList[$id]; | |
} | |
$urlDay=date("Y-m-d"); | |
$url="http://www.tvguide.or.jp/TF0001SY.php?programId=1&broadcastingDay={$urlDay}&packNo={$id}"; | |
$souce=file_get_contents($url); | |
$souce=preg_replace('{\r|\n|\t}',"",$souce); | |
preg_match('{<div class="content_head_text_2" id="explanation_object_date">[0-9/ ~:]+((.+?))}',$souce,$m); | |
$this->packList[$id]=$m[1]; | |
$this->packList[$id]=preg_replace('{〜}',"~",$this->packList[$id]); | |
return $this->packList[$id]; | |
} | |
} | |
/* | |
保存ファイルは県ごとに作成する | |
\internetTVGuide_23.json | |
{chName:NHK総合,chId:4142,from:1234567890,to:1234567890,title:番組名,info:番組内容:category:none,flag[news,weather],programID:65536} | |
http://www.tvguide.or.jp//image/support/program_icon.gif | |
news weather new final | |
repeat stereo bilingual hand | |
mSound mString live bMode | |
digital wide mono free | |
2way ppv hv data | |
multi surround crear 1ch | |
2ch 3ch cc oc | |
dub | |
http://www.tvguide.or.jp//image/support/program_color.gif | |
drama movie music sport cook anime | |
番組情報を取得するのが | |
get(callback bool) | |
番組情報を更新するのが | |
update(過去の番組情報も残すか=true) | |
ここでupdate(false)とすると過去の情報も一気に削除して保存する | |
更新時、±24時間の内にprogramIDが重複する番組があれば、それを消す | |
dataプロパティのフォーマット | |
jsonそのまま。 | |
fromの順番でソート。0が最新、と言うか一週間後 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment