Skip to content

Instantly share code, notes, and snippets.

@si3mshady
Created June 4, 2024 13:19
Show Gist options
  • Save si3mshady/ec783d5a89e95088fe23c30c5d3dd72e to your computer and use it in GitHub Desktop.
Save si3mshady/ec783d5a89e95088fe23c30c5d3dd72e to your computer and use it in GitHub Desktop.
React code with D-ID avatar
import React, { useState, useEffect, useRef } from 'react';
// import ReactDOM from 'react-dom';
import trinity from './trinity.mp4'
import './App.css';
function App() {
const maxRetryCount = 3;
const maxDelaySec = 4;
const REACT_APP_INIT_SCRIPT = process.env.REACT_APP_INIT_SCRIPT || "Hey Elliott, what are we going to work on today? Wait, why do I even need to ask I already know it will be fun";
const [script,setScript] = useState(REACT_APP_INIT_SCRIPT)
const [prompt,setPrompt] = useState('')
const [isVideoPlaying, setIsVideoPlaying] = useState(false); // Add this state variable
const [voices, setVoices] = useState([]);
const [voice, setVoice] = useState('en-US-NancyNeural');
const [timer,setTimer] = useState(0)
const [imageUrl, setImageUrl] = useState(null);
const [listening, setListening] = useState(false);
const [spokenText, setSpokenText] = useState('');
const talkVideoRef = useRef(null);
const idleVideoRef = useRef(null); // Ref for the idle video
const REACT_APP_DID_API_KEY = process.env.REACT_APP_DID_API_KEY || "";
const REACT_APP_SERVER_URL = process.env.REACT_APP_SERVER_URL || "http://localhost:5000/query";
const videoRef = useRef(null);
const recognitionRef = useRef(null);
const [isVideoVisible, setIsVideoVisible] = useState(false);
useEffect(() => {
// Load the trinity.mp4 video into the idle-video element when the component mounts
const playIdleVideo = () => {const idleVideoElement = document.getElementById('idle-video');
// console.log(idleVideoElement)
idleVideoElement.src = trinity;
idleVideoElement.muted = true;
idleVideoElement.playsInline = true;
idleVideoElement.srcObject = undefined;}
playIdleVideo()
}, [isVideoVisible]);
const playIdleVideo = () => {
// Show the video
setIsVideoVisible(true);
};
const hideIdleVideo = () => {
// Hide the video
setIsVideoVisible(false);
};
useEffect(() => {
// for did fetch voices
async function fetchData() {
const startSpeechRecognition = () => {
const recognition = new window.webkitSpeechRecognition() || new window.SpeechRecognition();
recognition.continuous = true;
recognition.interimResults = true;
recognitionRef.current = recognition;
recognitionRef.current.onresult = handleSpeechRecognitionResult;
recognitionRef.current.start(); // Start recognition
};
if ('webkitSpeechRecognition' in window) {
startSpeechRecognition();
} else {
console.log('Speech recognition not supported');
}
const url = 'https://api.d-id.com/tts/voices';
const headers = {
'accept': 'application/json',
'Authorization': `Basic ${REACT_APP_DID_API_KEY}`
};
try {
const response = await fetch(url, { method: 'GET', headers: headers });
const data = await response.json();
setVoices(data.filter(element => element.access !== 'premium'))
} catch (error) {
console.error('Error fetching data:', error);
}
}
fetchData();
return () => {
if (recognitionRef.current) {
recognitionRef.current.stop();
recognitionRef.current = null;
}
};
}, []);
const handleSpeechRecognitionResult = (event) => {
const transcript = Array.from(event.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join('');
setSpokenText(transcript);
};
const toggleListening = () => {
if (!recognitionRef.current) return;
if (listening) {
recognitionRef.current.stop();
} else {
recognitionRef.current.start();
}
setListening((prevState) => !prevState);
};
const RTCPeerConnection =
(window.RTCPeerConnection ||
window.webkitRTCPeerConnection ||
window.mozRTCPeerConnection).bind(window);
let peerConnection;
let streamId;
let sessionId;
let sessionClientAnswer;
let statsIntervalId;
let videoIsPlaying;
let lastBytesReceived;
let stream;
let session;
// Add other variable declarations here
// Add event handlers using JSX syntax
const handleConnect = async () => {
console.log('Clicked')
if (peerConnection && peerConnection.connectionState === 'connected') {
return;
}
stopAllStreams();
closePC();
const sessionResponse = await fetchWithRetries("https://api.d-id.com/talks/streams", {
method: 'POST',
headers: {
Authorization: `Basic ${REACT_APP_DID_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
source_url: imageUrl,
}),
});
console.log(sessionResponse)
const { id: newStreamId, offer, ice_servers: iceServers, session_id: newSessionId } = await sessionResponse.json();
streamId = newStreamId;
sessionId = newSessionId;
localStorage.setItem('streamId', newStreamId);
localStorage.setItem('sessionId', newSessionId);
try {
sessionClientAnswer = await createPeerConnection(offer, iceServers);
} catch (e) {
console.log('error during streaming setup', e);
stopAllStreams();
closePC();
return;
}
const sdpResponse = await fetch(`https://api.d-id.com/talks/streams/${streamId}/sdp`, {
method: 'POST',
headers: {
Authorization: `Basic ${REACT_APP_DID_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
answer: sessionClientAnswer,
session_id: sessionId,
}),
});
};
// Function to stop all streams
const stopAllStreams = () => {
if (!talkVideoRef.current) return;
if (talkVideoRef.current.srcObject) {
console.log('Stopping video streams');
talkVideoRef.current.srcObject.getTracks().forEach(track => track.stop());
talkVideoRef.current.srcObject = null;
}
};
const closePC = () => {
}
async function fetchWithRetries(url, options, retries = 1) {
try {
return await fetch(url, options);
} catch (err) {
if (retries <= maxRetryCount) {
const delay = Math.min(Math.pow(2, retries) / 4 + Math.random(), maxDelaySec) * 1000;
await new Promise((resolve) => setTimeout(resolve, delay));
console.log(`Request failed, retrying ${retries}/${maxRetryCount}. Error ${err}`);
return fetchWithRetries(url, options, retries + 1);
} else {
throw new Error(`Max retries exceeded. error: ${err}`);
}
}
}
async function createPeerConnection(offer, iceServers) {
if (!peerConnection) {
peerConnection = new RTCPeerConnection({ iceServers });
peerConnection.addEventListener('icegatheringstatechange', onIceGatheringStateChange, true);
peerConnection.addEventListener('icecandidate', onIceCandidate, true);
peerConnection.addEventListener('iceconnectionstatechange', onIceConnectionStateChange, true);
peerConnection.addEventListener('connectionstatechange', onConnectionStateChange, true);
peerConnection.addEventListener('signalingstatechange', onSignalingStateChange, true);
peerConnection.addEventListener('track', onTrack, true);
}
await peerConnection.setRemoteDescription(offer);
console.log('set remote sdp OK');
const sessionClientAnswer = await peerConnection.createAnswer();
console.log('create local sdp OK');
await peerConnection.setLocalDescription(sessionClientAnswer);
console.log('set local sdp OK');
return sessionClientAnswer;
}
const setVideoElement = (stream) => {
// if (!stream || !talkVideoRef.current) return;
talkVideoRef.current.srcObject = stream;
talkVideoRef.current.loop = false;
talkVideoRef.current.muted = false;
// Safari hotfix
if (talkVideoRef.current.paused) {
talkVideoRef.current.play()
.then(_ => {})
.catch(e => {});
}
};
// const playIdleVideo = () => {
// if (!idleVideoRef.current) return;
// // Set the state variable to indicate that the video is playing
// setIsVideoPlaying(false);
// // Set the source and play the idle video
// talkVideoRef.current.src = trinity;
// // idleVideoRef.current.load(); // Ensure the video is loaded
// // idleVideoRef.current.play();
// // Set loop and muted properties
// talkVideoRef.current.loop = true;
// idleVideoRef.current.muted = true
// ;
// // test.play()
// };
const handleOption = (e) => {
setVoice(e.target.value)
}
// function onVideoStatusChange(videoIsPlaying, stream) {
// let status;
// if (videoIsPlaying) {
// status = 'streaming';
// const remoteStream = stream;
// setVideoElement(remoteStream);
// setIsVideoPlaying(true); // Set the state variable to indicate that the video is playing
// } else {
// status = 'empty';
// const remoteStream = null;
// setVideoElement(remoteStream);
// setIsVideoPlaying(false); // Set the state variable to indicate that the video is playing
// playIdleVideo();
// }
// }
async function createTalk(stream_id, session_id, dialogue) {
setTimer(dialogue.length)
const url = `https://api.d-id.com/talks/streams/${stream_id}`;
const payload = {
script: {
input: dialogue,
type: 'text',
subtitles: 'false',
provider: {
type: 'microsoft',
voice_id: voice ? voice : 'en-US-JennyNeural' ,
},
ssml: 'false',
},
config: {
fluent: 'false',
pad_audio: '0.0',
},
session_id: session_id,
};
const headers = {
'Content-Type': 'application/json',
// "Accept": "application/json",
Authorization: `Basic ${REACT_APP_DID_API_KEY}`,
};
const response = await fetch(url, {
method: 'POST',
headers: headers,
body: JSON.stringify(payload),
});
const data = await response.json();
console.log(data);
return data;
}
const handleTalk = async () => {
// connectionState not supported in firefox
if (peerConnection?.signalingState === 'stable' || peerConnection?.iceConnectionState === 'connected') {
const talkResponse = await fetchWithRetries(`https://api.d-id.com/talks/streams/${streamId}`, {
method: 'POST',
headers: {
Authorization: `Basic ${REACT_APP_DID_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
script: {
input: script,
type: 'text',
subtitles: 'false',
provider: {
type: 'microsoft',
voice_id: voice ? voice : 'en-US-JennyNeural',
},
ssml: 'false',
},
config: {
fluent: 'false',
pad_audio: '0.0',
},
session_id: sessionId,
}),
});
console.log(talkResponse);
// Check if the talk response is ok
if (talkResponse.ok) {
console.log('Streaming video should be playing');
// setIsVideoPlaying(true); // Switch to talk video
}
}
// Wait for the duration of the talk before setting video playing to false
};
const promptQuestionGetResponse = async (question) => {
console.log('Asking the question ', question)
const headers = {
'Content-Type': 'application/json',
};
const body = {
query: question,
};
const response = await fetch(REACT_APP_SERVER_URL, {
method: 'POST',
headers: headers,
body: JSON.stringify(body)
});
const data = await response.json();
console.log(data)
const stream = localStorage.getItem('streamId');
const session = localStorage.getItem('sessionId');
createTalk(stream,session, data)
// return data.choices[0].message.content;
}
function onTrack(event) {
/**
* The following code is designed to provide information about wether currently there is data
* that's being streamed - It does so by periodically looking for changes in total stream data size
*
* This information in our case is used in order to show idle video while no talk is streaming.
* To create this idle video use the POST https://api.d-id.com/talks endpoint with a silent audio file or a text script with only ssml breaks
* https://docs.aws.amazon.com/polly/latest/dg/supportedtags.html#break-tag
* for seamless results use `config.fluent: true` and provide the same configuration as the streaming video
*/
if (!event.track) return;
statsIntervalId = setInterval(async () => {
const stats = await peerConnection.getStats(event.track);
stats.forEach((report) => {
if (report.type === 'inbound-rtp' && report.mediaType === 'video') {
const videoStatusChanged = videoIsPlaying !== report.bytesReceived > lastBytesReceived;
if (videoStatusChanged) {
videoIsPlaying = report.bytesReceived > lastBytesReceived;
onVideoStatusChange(videoIsPlaying, event.streams[0]);
// console.log("Video is playing: status -> ",videoIsPlaying )
}
lastBytesReceived = report.bytesReceived;
}
});
}, 500);
}
const playLoopVideo = () => {const idleVideoElement = document.getElementById('idle-video');
// console.log(idleVideoElement)
idleVideoElement.src = trinity;
idleVideoElement.muted = true;
idleVideoElement.playsInline = true;
idleVideoElement.srcObject = undefined;}
function onVideoStatusChange(videoIsPlaying, stream) {
let status;
if (videoIsPlaying) {
status = 'streaming';
const remoteStream = stream;
setVideoElement(remoteStream);
} else {
status = 'empty';
console.log("Video should have stopped")
playLoopVideo()
// setIsVideoVisible(!isVideoVisible)
}
}
function onIceGatheringStateChange() {
console.log('ice gathering state change')
}
function onIceCandidate(event) {
console.log('onIceCandidate', event);
if (event.candidate) {
const { candidate, sdpMid, sdpMLineIndex } = event.candidate;
fetch(`https://api.d-id.com/talks/streams/${streamId}/ice`, {
method: 'POST',
headers: {
Authorization: `Basic ${REACT_APP_DID_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
candidate,
sdpMid,
sdpMLineIndex,
session_id: sessionId,
}),
});
}
}
function onIceConnectionStateChange() {
console.log('ice-state-change')
if (peerConnection.iceConnectionState === 'failed' || peerConnection.iceConnectionState === 'closed') {
stopAllStreams();
closePC();
}
}
function onConnectionStateChange() {
// not supported in firefox
console.log('connection state change')
}
function onSignalingStateChange() {
console.log('signal state change')
}
// function onVideoStatusChange(videoIsPlaying, stream) {
// let status;
// if (videoIsPlaying) {
// status = 'streaming';
// const remoteStream = stream;
// setVideoElement(remoteStream);
// } else {
// status = 'empty';
// playIdleVideo();
// }
// }
const handleImage = (e) => {
setImageUrl(e.target.value)
}
return (
<div id="app" className="page-container">
<div className="card-container">
<div className="video-container">
<video ref={talkVideoRef} muted loop autoPlay playsInline id="idle-video" width="600" height="600"></video>
</div>
<div style={{marginBottom: ".75rem", display: "flex", justifyContent: "center", flexDirection: "column", textAlign:'center'}}>
<label>Select Voice ID:</label>
<select onChange={handleOption} className="bg-gray-800 p-3 rounded text-gray-300 w-full mb-4">
{voices.filter(element => element.access !== 'premium').map((element) => (
<option key={element.id} value={element.id}>{element.id}</option>
))}
</select>
</div>
<div style={{marginBottom: ".75rem", display: "flex", justifyContent: "center", flexDirection: "column", textAlign:'center'}}>
<label>Enter Image URL:</label>
<input type="text" value={imageUrl} onChange={(e) => setImageUrl(e.target.value)} placeholder="Image Url" className="input-field" />
</div>
<div style={{marginBottom: "1.5rem", display: "flex", justifyContent: "center", flexDirection: "column", textAlign:'center'}}>
<label>Enter Prompt - RAG Mode:</label>
<input type="text" value={prompt} onChange={(e) => setPrompt(e.target.value)} placeholder="RAG Mode" className="input-field" />
</div>
{/* Field input section updated with the spoken text */}
<div style={{ marginBottom: '1.5rem', display: 'flex', justifyContent: 'center', flexDirection: 'column', textAlign: 'center' }}>
<label>Spoken Text:</label>
<input type="text" value={spokenText} readOnly className="input-field" />
</div>
<div className="button-container">
<div >
<button className="button connect-button" style={{ marginRight: '1rem' }} type="button" onClick={handleConnect}>Connect</button>
<button className="button startsession-button" style={{ marginRight: '1rem' }} onClick={handleTalk} type="button">Start Session</button>
<button className="button talk-button" style={{ marginRight: '1rem' }} type="button" onClick={() => promptQuestionGetResponse(spokenText)}>ASK</button>
<button className="button talk-button" style={{ marginRight: '1rem' }} type="button" onClick={() => promptQuestionGetResponse(prompt)}>SEND TEXT</button>
{/* Button to toggle listening */}
<button className="button" style={{ marginRight: '1rem' }} onClick={toggleListening} type="button">
{listening ? 'Stop Listening' : 'Start Listening'}
</button>
<div style={{ display: 'flex', flexDirection: 'column', marginLeft: '1rem' }}>
{/* <input type="file" onChange={handleFileInputChange} accept=".jpg,.jpeg,.png,.gif" /> */}
</div>
{/* <video
ref={videoRef}
autoPlay
style={{
display: 'none',
visibility: 'hidden',
opacity: 0,
zIndex: -1,
}}
/> */}
</div>
</div>
</div>
</div>
);
}
export default App;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment