-
-
Save arzga/da22da22782e0b79c2271ed0f206d6df to your computer and use it in GitHub Desktop.
import React, { useCallback, useEffect, useState } from "react"; | |
import { | |
SpeechSegment, | |
SpeechProvider, | |
useSpeechContext, | |
} from "@speechly/react-client"; | |
import { | |
BigTranscript, | |
BigTranscriptContainer, | |
PushToTalkButton, | |
PushToTalkButtonContainer, | |
} from "@speechly/react-ui"; | |
type DeviceStates = { | |
[device: string]: boolean; | |
}; | |
type Rooms<T> = { | |
[room: string]: T; | |
}; | |
type AppState = { | |
rooms: Rooms<DeviceStates>; | |
}; | |
const DefaultAppState = { | |
rooms: { | |
"living room": { | |
radio: false, | |
television: false, | |
lights: false, | |
}, | |
bedroom: { | |
radio: false, | |
lights: false, | |
}, | |
kitchen: { | |
radio: false, | |
lights: false, | |
}, | |
}, | |
}; | |
export default function App() { | |
return ( | |
<div className="App"> | |
<SpeechProvider | |
appId="a14e42a3-917e-4a57-81f7-7433ec71abad" | |
language="en-US" | |
> | |
<BigTranscriptContainer> | |
<BigTranscript /> | |
</BigTranscriptContainer> | |
<SpeechlyApp /> | |
<PushToTalkButtonContainer> | |
<PushToTalkButton captureKey=" " /> | |
</PushToTalkButtonContainer> | |
</SpeechProvider> | |
</div> | |
); | |
} | |
function SpeechlyApp() { | |
const { segment } = useSpeechContext(); | |
const [tentativeAppState, setTentativeAppState] = useState<AppState>(DefaultAppState); | |
const [appState, setAppState] = useState<AppState>(DefaultAppState); | |
const [selectedRoom, setSelectedRoom] = useState<string | undefined>(); | |
const [selectedDevice, setSelectedDevice] = useState<string | undefined>(); | |
// This effect is fired whenever there's a new speech segment available | |
useEffect(() => { | |
if (segment) { | |
let alteredState = alterAppState(segment); | |
// Set current app state | |
setTentativeAppState(alteredState); | |
if (segment.isFinal) { | |
// Store the final app state as basis of next utterance | |
setAppState(alteredState); | |
setSelectedRoom(undefined); | |
setSelectedDevice(undefined); | |
} | |
} | |
// eslint-disable-next-line react-hooks/exhaustive-deps | |
}, [segment]); | |
// Create a modified app state by applying the speech segment info to the base state | |
const alterAppState = useCallback( | |
(segment: SpeechSegment): AppState => { | |
console.log(segment); | |
switch (segment.intent.intent) { | |
case "turn_on": | |
case "turn_off": | |
// Get values for room and device entities. Note that values are UPPER CASE by default. | |
const room = segment.entities | |
.find((entity) => entity.type === "room") | |
?.value.toLowerCase(); | |
const device = segment.entities | |
.find((entity) => entity.type === "device") | |
?.value.toLowerCase(); | |
setSelectedRoom(room); | |
setSelectedDevice(device); | |
// Set desired device powerOn based on the intent | |
const isPowerOn = segment.intent.intent === "turn_on"; | |
if ( | |
room && | |
device && | |
appState.rooms[room] !== undefined && | |
appState.rooms[room][device] !== undefined | |
) { | |
return { | |
...appState, | |
rooms: { | |
...appState.rooms, | |
[room]: { ...appState.rooms[room], [device]: isPowerOn }, | |
}, | |
}; | |
} | |
break; | |
} | |
return appState; | |
}, | |
[appState] | |
); | |
// Render the app state as outlined boxes representing rooms with devices in them | |
return ( | |
<div | |
style={{ | |
display: "flex", | |
height: "100vh", | |
flexDirection: "row", | |
justifyContent: "center", | |
alignItems: "center", | |
alignContent: "center", | |
flexWrap: "wrap", | |
}} | |
> | |
{Object.keys(appState.rooms).map((room) => ( | |
<div | |
key={room} | |
style={{ | |
width: "12rem", | |
height: "12rem", | |
padding: "0.5rem", | |
borderWidth: "2px", | |
borderStyle: "solid", | |
borderColor: selectedRoom === room ? "cyan" : "black", | |
}} | |
> | |
{room} | |
<div | |
style={{ | |
paddingTop: "1rem", | |
display: "flex", | |
flexDirection: "row", | |
justifyContent: "start", | |
alignItems: "start", | |
flexWrap: "wrap", | |
}} | |
> | |
{Object.keys(appState.rooms[room]).map((device) => ( | |
<div | |
key={device} | |
style={{ | |
flexBasis: "5rem", | |
margin: "0.2rem", | |
padding: "0.2rem", | |
background: | |
selectedDevice === device && | |
(!selectedRoom || selectedRoom === room) | |
? "cyan" | |
: "lightgray", | |
}} | |
> | |
{device} | |
<br /> | |
{appState.rooms[room][device] ? ( | |
tentativeAppState.rooms[room][device] ? ( | |
<span style={{ color: "green" }}>On</span> | |
) : ( | |
<span style={{ color: "red" }}>Turning off...</span> | |
) | |
) : !tentativeAppState.rooms[room][device] ? ( | |
<span style={{ color: "red" }}>Off</span> | |
) : ( | |
<span style={{ color: "green" }}>Turning on...</span> | |
)} | |
</div> | |
))} | |
</div> | |
</div> | |
))} | |
</div> | |
); | |
} |
alterAppState is very reducer-like. It could actually be a reducer, but the it would not be able to directly conjure any side-effects like trigger animations/transitions, although they are not showcased in this example.
This example could be improved by storing setBaseState
at the start of a new utterance. The current approach, which uses setBaseState
at the end of the utterance will not work gracefully with GUI, as the old app state is restored upon starting a new utterance. Any app state changes made using GUI may be lost unless setBaseState
is updated.
Refactored app states into appState
(formerly baseAppState) and tentativeAppState
(formerly appState). Modified the rendering to display transitional states (turning on/off). Added visualisation of selected room/device during tentative state using cyan color.
Please note that it's currently possible that the transitional state visualisation may go unnoticed if the tentative period is very short.
Is this support in react version 18 both react-speech and react-speech-kit both not working in react version 18?
Example utterances:
"Turn off lights in the living room"
"Turn on lights in the bedroom"
You'll notice that nothing will happen if you leave out a part of the sentence. This example can (and probably should) be improved by allowing user to utter specify the key information (the room, device and power state) spread over multiple utterances. This would make the voice experience more flexible and more pleasant to use.