Skip to content

Instantly share code, notes, and snippets.

@hiroshi-manabe
Forked from mizchi/SpeechRecognizer.tsx
Created March 3, 2019 16:11
Show Gist options
  • Save hiroshi-manabe/51f4ec8a2bf8e83d5ea72aa7dc402027 to your computer and use it in GitHub Desktop.
Save hiroshi-manabe/51f4ec8a2bf8e83d5ea72aa7dc402027 to your computer and use it in GitHub Desktop.
import React, { useEffect, useState, useRef } from "react";
import {
SpeechRecogintionResult,
createSpeechRecognition,
createMediaRecorder,
SpeechRecord
} from "./utils";
let currentSpeechRecognition: any | null = null;
let currentMediaRecorder: any | null = null;
type Props = {
onBlobUpdated: (blob: Blob, records: SpeechRecord[]) => void;
};
let records: SpeechRecord[] = [];
export function SpeechRecognizer(props: Props) {
const textareaRef = useRef<HTMLTextAreaElement>(null);
const [isRecoding, setRecording] = useState(false);
const [step, setStep] = useState(0);
const [startAt, setStartAt] = useState(0);
const [withTimestamp, setWithTimestamp] = useState(true);
// const [records, setRecords] = useState<SpeechRecord[]>([]);
const [currentResults, setCurrentResults] = useState<
SpeechRecogintionResult[]
>([]);
const onRecordStart = async () => {
const mediaRecorder = await createMediaRecorder({
onData(chunks) {
const size = chunks.map(c => c.size).reduce((sum, i) => sum + i, 0);
console.log(`size: ${Math.floor(size / 1000)}kb`);
},
onRecordEnd(blob) {
props.onBlobUpdated(blob, records);
}
});
currentMediaRecorder = mediaRecorder;
mediaRecorder.start();
records = [];
setStartAt(Date.now());
setRecording(true);
};
const onRecordEnd = async () => {
if (currentSpeechRecognition) {
currentSpeechRecognition.stop();
}
if (currentMediaRecorder) {
currentMediaRecorder.stop();
}
setRecording(false);
setStep(0);
};
useEffect(
() => {
if (!isRecoding) {
return;
}
let recognition = createSpeechRecognition({
lang: "ja-JP",
onResult(results) {
setCurrentResults(results);
},
onEnd(results, range) {
if (results.length > 0) {
records.push({
start: range.start - startAt,
end: range.end - startAt,
results: results.map(r => {
// serialize to JSON.stringify
return {
transcript: r.transcript,
confidence: r.confidence
};
})
});
}
setCurrentResults([]);
setStep(s => s + 1);
}
});
currentSpeechRecognition = recognition;
console.log("start new recognition: step", step);
recognition.start();
return () => {
// @ts-ignore
recognition = null;
};
},
[isRecoding, step]
);
const speechText = recordsToString(records, 0, withTimestamp);
// scroll to bottom
useEffect(
() => {
if (textareaRef.current) {
textareaRef.current.scrollTop = textareaRef.current.scrollHeight;
}
},
[speechText]
);
return (
<>
{isRecoding ? (
<>
<button onClick={onRecordEnd}>recording end</button>
&nbsp; Recording...
</>
) : (
<button onClick={onRecordStart}>recording start</button>
)}
<hr />
<h3>Output</h3>
{currentResults.length > 0 && <div>Input...</div>}
{currentResults.map((r, index) => {
return (
<div key={index}>
{Math.floor(r.confidence * 10000) / 100}%: {r.transcript}:
</div>
);
})}
<textarea
ref={textareaRef}
placeholder="Press start to record..."
readOnly={isRecoding}
style={{ width: "50vw", height: "30vh" }}
value={speechText}
onChange={() => {}}
/>
<div>
{withTimestamp ? (
<button
onClick={() => {
setWithTimestamp(false);
}}
>
withTimestamp: on
</button>
) : (
<button
onClick={() => {
setWithTimestamp(true);
}}
>
withTimestamp: off
</button>
)}
</div>
</>
);
}
function recordsToString(
records: SpeechRecord[],
startAt: number,
withTimestamp: boolean
) {
return records
.map(r => {
const s = withTimestamp
? `${Math.floor((r.start - startAt) / 1000)}s: `
: "";
const t = r.results.map(i => i.transcript).join(". ");
return `${s}${t}`;
})
.join("\n");
}
// Speech
export type SpeechRecord = {
results: SpeechRecogintionResult[];
start: number;
end: number;
};
export type SpeechRecogintionResult = {
transcript: string;
confidence: number;
};
declare class SpeechRecogintion {
lang: string;
continuous: boolean;
interimResults: boolean;
stop(): void;
abort(): void;
onresult: (
event: {
results: Array<Array<SpeechRecogintionResult>>;
}
) => void;
onend: (event: any) => void;
onaudiostart: (event: any) => void;
onaudioend: (event: any) => void;
onspeechstart: (event: any) => void;
onspeechend: (event: any) => void;
onnomatch: (event: any) => void;
onerror: (event: any) => void;
start: () => void;
}
export const SpeechRecognition: typeof SpeechRecogintion =
(global as any).webkitSpeechRecognition || (global as any).SpeechRecognition;
// let currentRecognition: SpeechRecogintion | null = null;
export function createSpeechRecognition(opts: {
lang?: string;
onResult: (results: SpeechRecogintionResult[]) => void;
onEnd: (
results: SpeechRecogintionResult[],
range: { start: number; end: number }
) => void;
}) {
const recognition = new SpeechRecognition();
recognition.lang = opts.lang || "ja-JP";
recognition.interimResults = true;
let currentInputResults: SpeechRecogintionResult[] = [];
recognition.onresult = event => {
console.log("SpeechRecognition: onresult", event);
const r: SpeechRecogintionResult[] = [];
Array.from(event.results).forEach(xr => {
r.push(...Array.from(xr));
});
currentInputResults = r;
opts.onResult(r);
};
let defaultStart = Date.now();
let start: number | null = null;
let end: number | null = null;
recognition.onspeechstart = (_event: any) => {
// set at init
if (start == null) {
start = Date.now();
}
console.log("SpeechRecognition: onspeechstart");
};
recognition.onspeechend = (_event: any) => {
end = Date.now();
console.log("SpeechRecognition: onspeechend");
};
recognition.onnomatch = (_event: any) => {
console.log("SpeechRecogintion: nomatch");
};
recognition.onend = (event: any) => {
console.log("SpeechRecogintion: end");
opts.onEnd(currentInputResults, {
start: start || defaultStart,
end: end || defaultStart
});
};
recognition.onerror = (_event: any) => {
console.log("SpeechRecogintion: onerror");
};
return recognition;
}
// media recorder
declare var MediaRecorder: any;
export async function createMediaRecorder(listeners: {
onRecordEnd: (blob: Blob) => void;
onData: (chunks: Blob[]) => void;
}): Promise<{ start: Function; stop: Function }> {
const stream = await navigator.mediaDevices.getUserMedia({
audio: true,
video: false
});
const codec = "audio/webm";
const recorder = new MediaRecorder(stream, {
audioBitsPerSecond: 128000, // 128kbps
mimeType: codec
});
const chunks: Array<Blob> = [];
recorder.addEventListener("dataavailable", (ev: { data: Blob }) => {
chunks.push(ev.data);
listeners.onData(chunks);
});
recorder.addEventListener("stop", async () => {
const blob = new Blob(chunks, { type: codec });
listeners.onRecordEnd(blob);
});
return recorder;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment