Skip to content

Instantly share code, notes, and snippets.

@ronnywang
Created July 3, 2023 14:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ronnywang/07f597003678982a4a158f07ea6d7533 to your computer and use it in GitHub Desktop.
Save ronnywang/07f597003678982a4a158f07ea6d7533 to your computer and use it in GitHub Desktop.
<?php
// streamlink 'https://www.youtube.com/watch?v=oIgbl7t0S_w' 240p -O | ffmpeg -i /dev/stdin -vn -acodec pcm_s16le -ar 16000 -ac 2 -f segment -segment_time 5 -reset_timestamps 1 -strftime 1 output_%s.wav
date_default_timezone_set('Asia/Taipei');
$chunk_size = 5;
//$model = 'large';
$model = 'medium';
$message_pool = '';
$remain = $chunk_size;
while (true) {
$files = glob("output_*.wav");
sort($files);
array_pop($files);
if (!count($files)) {
sleep(1);
continue;
}
preg_match('#output_(\d+)#', $files[0], $matches);
$t = $matches[1];
$fp = fopen('list.txt', 'w');
foreach ($files as $f) {
fputs($fp, sprintf("file '%s'\n", $f));
}
fclose($fp);
$combined_file = "combined_{$t}.wav";
if (file_exists($combined_file)) {
unlink($combined_file);
}
$cmd = sprintf('ffmpeg -f concat -i list.txt -acodec copy %s', $combined_file);
error_log($cmd);
system($cmd);
$cmd = sprintf("ffprobe -i %s -show_entries format=duration", escapeshellarg($combined_file));
$ret = `$cmd`;
preg_match('#duration=(.*)#', $ret, $matches);
$duration = trim($matches[1]);
$offset = $chunk_size - $remain;
echo json_encode([
't' => $t,
't_s' => date('YmdHis', $t),
'files' => implode(',', $files),
'duration' => $duration,
'remain' => 0,
'offset' => $offset,
]) . "\n";
if ($duration > 40) {
$model = 'small';
} else {
$model = 'medium';
}
$cmd = sprintf("./whisper.cpp/main --prompt %s -m ./whisper.cpp/models/ggml-{$model}.bin -l zh --offset-t %d %s",
escapeshellarg("前面的內容:" . $message_pool),
$offset * 1000,
escapeshellarg($combined_file)
);
error_log($cmd);
$fp = popen($cmd, 'r');
$prev_message = null;
while ($line = fgets($fp)) {
if (!trim($line)) {
continue;
}
# 00:00:02.400
preg_match('#\[(.*) --> (.*)] (.*)#', trim($line), $matches);
list(, $s, $e, $message) = $matches;
$offset = array_map('floatval', explode(':', $s));
$offset = $offset[0] * 3600 + $offset[1] * 60 + $offset[2];
$m = ['offset' => $offset, 'message' => $message];
if (!is_null($prev_message)) {
$message_pool .= $prev_message['message'];
if (mb_strlen($message_pool) > 100) {
$message_pool = mb_substr($message_pool, mb_strlen($message_pool) - 100);
}
echo json_encode([
'time' => date('Y-m-d H:i:s', floor($t + $prev_message['offset'])),
'message' => $prev_message['message'],
'offset' => $offset,
'model ' => $model,
], JSON_UNESCAPED_UNICODE) . "\n";
}
$prev_message = $m;
}
pclose($fp);
if (is_null($prev_message)) {
foreach ($files as $f) {
unlink($f);
}
} else {
array_pop($files);
foreach ($files as $f) {
unlink($f);
}
$remain = $duration - $prev_message['offset'];
echo json_encode([
'combined_file' => $combined_file,
'prev_message' => $prev_message,
'duration' => $duration,
] ,JSON_UNESCAPED_UNICODE) . "\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment