Skip to content

Instantly share code, notes, and snippets.

@ajslaghu
Last active December 26, 2015 19:19
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajslaghu/7200963 to your computer and use it in GitHub Desktop.
Save ajslaghu/7200963 to your computer and use it in GitHub Desktop.
subtitle parser for php
<?php
// written by AJ Slaghuis 2013
define('DS', DIRECTORY_SEPARATOR);
$basepath = './subtitles';
$entries = scandir($basepath); //sorted alphabetically
foreach ($entries as $entry) {
if ($entry == '.' || $entry == '..' || is_dir($entry) || substr($entry, -4) != '.txt')
continue;
$prid = substr($entry, 0, -4);
$result = array();
$handle = fopen($basepath . DS . $entry, "r");
if ($handle) {
if (($buffer = fgets($handle, 4096)) !== false) {
if ($buffer == "WEBVTT\n") {
// echo 'marker found ' . $buffer . "\n";
} else
die('-1');
}else
die('-7');
if (($buffer = fgets($handle, 4096)) !== false) {
if ($buffer == "\n") {
// echo 'intro empty line found ' . $buffer . "\n";
} else
die('-2');
}else
die('-7');
while (true) {
if (($buffer = fgets($handle, 4096)) !== false) {
$num = substr($buffer, 0, -1); // we expect an int
//echo ' buffernum: ' . $num . "\n";
} else {
break; // EOF reached
}
if (($buffer = fgets($handle, 4096)) !== false) {
$interval = substr($buffer, 0, -1);
//echo ' interval: ' . $interval . "\n";
} else
die('-3');
if (($buffer = fgets($handle, 4096)) !== false) {
// echo 'tekst: ' . $buffer;
$text = $buffer;
} else
die('-4');
if (($buffer = fgets($handle, 4096)) !== false) {
if ($buffer == "\n") {
//echo 'intro empty line found ' . $buffer . "\n";
} else
die('-5');
} else
die('-6');
//echo $text;
$result[] = array('num' => $num, 'interval' => $interval, 'text' => $text);
}
$json = json_encode((object) $result);
//echo $json; // now curl it into elastic search with a prid as id
$url = 'http://localhost:9200/kro/subs/' . $prid;
$ci = curl_init();
curl_setopt($ci, CURLOPT_URL, $url);
curl_setopt($ci, CURLOPT_PORT, 9200);
curl_setopt($ci, CURLOPT_TIMEOUT, 200);
curl_setopt($ci, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ci, CURLOPT_FORBID_REUSE, 0);
curl_setopt($ci, CURLOPT_CUSTOMREQUEST, 'PUT');
curl_setopt($ci, CURLOPT_POSTFIELDS, $json);
$response = curl_exec($ci);
print_r($response);
fclose($handle);
} else
die("$entry no handle\n");
}
?>
@ajslaghu
Copy link
Author

test your elastic search instance with:
curl -s -XGET 'localhost:9200/kro/subs/_search?pretty&q=hilversum'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment