Skip to content

Instantly share code, notes, and snippets.

@a-yasui
Created August 30, 2017 06:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save a-yasui/0abec4e455551e0ce9411d749794efee to your computer and use it in GitHub Desktop.
Save a-yasui/0abec4e455551e0ce9411d749794efee to your computer and use it in GitHub Desktop.
Video IndexerのJSONファイルをWebVTTに書き出すスクリプト
<?php
/**
* php ./convert.php -f <json file>
*/
/**
* Class Block
*/
class Block
{
/**
* @var
*/
protected $id;
/**
* @var
*/
protected $start_time;
/**
* @var
*/
protected $end_time;
/**
* @var
*/
protected $message;
/**
* Block constructor.
* @param $id
* @param $start_time
* @param $end_time
* @param $message
*/
public function __construct($id, $start_time, $end_time, $message)
{
$this->id = $id;
$this->start_time = $start_time;
$this->end_time = $end_time;
$this->message = $message;
}
/**
* @return int
*/
public function getId()
{
return $this->id;
}
/**
* @return string
*/
public function getStartTime()
{
if(preg_match('/\A(\d+:\d+:\d+\.\d\d\d).*/x', $this->start_time, $group)){
var_dump($group);
return $group[1];
}
$result = preg_split( '/\./', $this->start_time );
return $result[ 0 ] . '.000';
}
/**
* @return string
*/
public function getEndTime()
{
if(preg_match('/\A(\d+:\d+:\d+\.\d\d\d).*/x', $this->end_time, $group)){
var_dump($group);
return $group[1];
}
$result = preg_split( '/\./', $this->end_time );
return $result[ 0 ] . '.000';
}
/**
* @return string
*/
public function getMessage()
{
return $this->message;
}
}
/**
* Class FileIsUnValidJsonException
*/
class FileIsUnValidJsonException extends \Exception
{
/**
* @var string
*/
protected $message = 'ファイルをJsonとして読み込めませんでした。';
}
/**
* Class Parser
*/
class Parser
{
/**
* @var
*/
protected $file;
/**
* @var array
*/
protected $lines;
/**
* Parser constructor.
* @param $file
*/
public function __construct($file)
{
$this->file = $file;
$this->lines = [];
}
/**
* @return Block[]
* @throws FileIsUnValidJsonException
*/
public function getLines()
{
if (0 < count( $this->lines )) {
throw new FileIsUnValidJsonException();
}
$raw_json = json_decode( file_get_contents( $this->file ), true );
$result = [];
if (!isset( $raw_json[ 'breakdowns' ] )) {
return $result;
}
foreach ($raw_json[ 'breakdowns' ] as $breakdown) {
if (!isset( $breakdown[ 'insights' ] ) || !isset( $breakdown[ 'insights' ][ 'transcriptBlocks' ] )) {
continue;
}
$transcriptBlocks = $breakdown[ 'insights' ][ 'transcriptBlocks' ];
foreach ($transcriptBlocks as $block) {
if (!isset( $block[ 'lines' ] )) {
continue;
}
foreach ($block[ 'lines' ] as $line) {
$result[] = new Block(
$line[ 'id' ], $line[ 'timeRange' ][ 'start' ], $line[ 'timeRange' ][ 'end' ],
$line[ 'text' ] );
}
}
}
$this->lines = $result;
return $this->lines;
}
}
/**
* Class Dumper
*/
class Dumper
{
/**
* @var
*/
protected $data;
/**
* Dumper constructor.
* @param Block[] $data
*/
public function __construct($data)
{
$this->data = $data;
}
/**
* @param string $file
* @throws \Exception
*/
public function output($file_path)
{
$fp = fopen( $file_path, 'aw' );
if ($fp === false) {
throw new \Exception( 'ファイル「' . $file_path . '」に書き出すことができませんでした。' );
}
fwrite( $fp, "WEBVTT - This file has cues.\n\n" );
$counter = 0;
foreach ($this->data as $data) {
if ($data->getMessage() != '') {
fwrite( $fp, $counter . "\n" );
fwrite( $fp, $data->getStartTime() . " --> " . $data->getEndTime() . "\n" );
fwrite( $fp, $data->getMessage() . "\n\n\n" );
$counter += 1;
}
}
fclose( $fp );
}
}
$arg = getopt( 'f:' );
$read_file = $arg[ 'f' ];
if (file_exists( $read_file ) === false) {
throw new \Exception( 'File:[' . $read_file . '] is not found.' );
}
$file = new \SplFileInfo( $read_file );
$outputfile = $file->getBasename( $file->getExtension() ) . 'vtt';
echo "Load Data...\n";
$parser = new Parser( $file->getRealPath() );
$load_data = $parser->getLines();
echo "Load Finish.\n";
echo "Out put for [" . $outputfile . "]\n";
$dumper = new Dumper( $load_data );
$dumper->output( $outputfile );
echo "Finish\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment