#!/bin/sh | |
# sendmail-gcloud | |
# | |
# Installation instructions | |
# Copy the content of this file to /usr/sbin/sendmail-gcloud | |
# | |
# Google Account | |
# --------------- | |
# Create a Google Cloud account if you don't have one yet. Free trial is available at https://console.cloud.google.com/freetrial | |
# Within console.cloud.google.com search for Cloud Speech-to-Text API and enable it | |
# | |
# From the Linux command line on the FreePBX machine | |
# ------------------------------------------- | |
# Follow steps 1 and 2 of the instructions on Google Cloud https://cloud.google.com/sdk/docs/downloads-yum | |
# Run the following commands on FreePBX; | |
# cd /usr/sbin/ | |
# chown asterisk:asterisk sendmail-gcloud | |
# chmod 744 sendmail-gcloud | |
# chmod 744 /usr/bin/dos2unix | |
# | |
# Verify that you have the following (by simply running the command) and if not use yum install; | |
# jq | |
# sox | |
# flac | |
# dos2unix -V | |
# Ensure dos2unix is executable by the asterisk user (chmod 777 /usr/bin/dos2unix) | |
# | |
# Connect FreePBX to Google Cloud | |
# su asterisk | |
# gcloud auth login | |
# CLI will provide you a url. Copy that and paste it into your browser. Google will give you a verification code to copy. Paste it into the cli waiting for a verification code. | |
# | |
# Open FreePBX web interface | |
# Go to Settings > Voicemail Admin > Settings > Email Config | |
# Change Mail Command to: /usr/sbin/sendmail-gcloud | |
# Submit and apply changes | |
# | |
# Original source created by N. Bernaerts: https://github.com/NicolasBernaerts/debian-scripts/tree/master/asterisk | |
# modified per: https://jrklein.com/2015/08/17/asterisk-voicemail-transcription-via-ibm-bluemix-speech-to-text-api/ | |
# modified per: https://gist.github.com/lgaetz/2cd9c54fb1714e0d509f5f8215b3f5e6 | |
# current version: https://gist.github.com/tony722/7c6d86be2e74fa10a1f344a4c2b093ea | |
# | |
# Notes: This is a script modified from the original to work with FreePBX so that email notifications sent from | |
# Asterisk voicemail contain a speech to text transcription provided by Google Cloud Speech API | |
# | |
# License: There are no explicit license terms on the original script or on the blog post with modifications | |
# I'm assumig GNU/GPL2+ unless notified otherwise by copyright holder(s) | |
# | |
# Version History: | |
# 2021-05-06 Add fix by dcat127: trim flac file to 59 seconds | |
# 2020-08-27 Add fix by chrisduncansn | |
# Minor edit in instruction wording | |
# 2020-05-27 Add instructions from sr10952 | |
# Add export fix by levishores | |
# 2019-02-27 Initial commit by tony722 | |
# set PATH | |
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" | |
# save the current directory | |
pushd . | |
# create a temporary directory and cd to it | |
TMPDIR=$(mktemp -d) | |
cd $TMPDIR | |
# dump the stream to a temporary file | |
cat >> stream.org | |
# get the boundary | |
BOUNDARY=$(grep "boundary=" stream.org | cut -d'"' -f 2) | |
# if mail has no boundaries, assume no attachment | |
if [ "$BOUNDARY" = "" ] | |
then | |
# send the original stream | |
mv stream.org stream.new | |
else | |
# cut the original stream into parts | |
# stream.part - header before the boundary | |
# stream.part1 - header after the bounday | |
# stream.part2 - body of the message | |
# stream.part3 - attachment in base64 (WAV file) | |
# stream.part4 - footer of the message | |
awk '/'$BOUNDARY'/{i++}{print > "stream.part"i}' stream.org | |
# cut the attachment into parts | |
# stream.part3.head - header of attachment | |
# stream.part3.wav.base64 - wav file of attachment (encoded base64) | |
sed '7,$d' stream.part3 > stream.part3.wav.head | |
sed '1,6d' stream.part3 > stream.part3.wav.base64 | |
# convert the base64 file to a wav file | |
dos2unix -o stream.part3.wav.base64 | |
base64 -di stream.part3.wav.base64 > stream.part3.wav | |
# convert the wav file to FLAC | |
sox -G stream.part3.wav --channels=1 --bits=16 --rate=8000 stream.part3.flac trim 0 59 | |
# convert to MP3 | |
sox stream.part3.wav stream.part3-pcm.wav | |
lame -m m -b 24 stream.part3-pcm.wav stream.part3.mp3 | |
base64 stream.part3.mp3 > stream.part3.mp3.base64 | |
# create mp3 mail part | |
sed 's/x-[wW][aA][vV]/mpeg/g' stream.part3.wav.head | sed 's/.[wW][aA][vV]/.mp3/g' > stream.part3.new | |
dos2unix -o stream.part3.new | |
unix2dos -o stream.part3.mp3.base64 | |
cat stream.part3.mp3.base64 >> stream.part3.new | |
# save voicemail in tmp folder in case of trouble | |
# TMPMP3=$(mktemp -u /tmp/msg_XXXXXXXX.mp3) | |
# cp "stream.part3.mp3" "$TMPMP3" | |
export CLOUDSDK_CONFIG=/home/asterisk/.config/gcloud | |
RESULT=`gcloud ml speech recognize stream.part3.flac --language-code='en-US'` | |
FILTERED=`echo "$RESULT" | jq -r '.results[].alternatives[].transcript'` | |
# generate first part of mail body, converting it to LF only | |
mv stream.part stream.new | |
cat stream.part1 >> stream.new | |
sed '$d' < stream.part2 >> stream.new | |
# beginning of transcription section | |
echo "" >> stream.new | |
echo "--- Google transcription result ---" >> stream.new | |
# append result of transcription | |
if [ -z "$FILTERED" ] | |
then | |
echo "(Google was unable to recognize any speech in audio data.)" >> stream.new | |
else | |
echo "$FILTERED" >> stream.new | |
fi | |
# end of message body | |
tail -1 stream.part2 >> stream.new | |
# add converted attachment | |
cat stream.part3.new >> stream.new | |
# append end of mail body, converting it to LF only | |
echo "" >> stream.tmp | |
echo "" >> stream.tmp | |
cat stream.part4 >> stream.tmp | |
dos2unix -o stream.tmp | |
cat stream.tmp >> stream.new | |
fi | |
# send the mail thru sendmail | |
cat stream.new | sendmail -t | |
# go back to original directory | |
popd | |
# remove all temporary files and temporary directory | |
rm -Rf $TMPDIR |
I have actually been able to debug it by only stopping deletion of the temp files, so I was able to see somewhat at which step it stops, although it was much more difficult to troubleshoot... I wrote this guide above that actually worked for me in production. (latest verison of FreePBX and gcloud).
@sr10952. I've incorporated your instructions into the original gist as comments so they can be seen too. Thanks!
@levishores, I've added the export line to the gist too. Thanks for your contrib. :-)
Hi there I hope you guys can help, I tried to implement this scriot But im having issues, I dont really know whats wrong.
I tried to debbug and this is what im getting on my temp file when i read stream.new
Subject: FreePBX Voicemail Notification
Message-ID: Asterisk-10-1085661504-5555-2888@domain
X-Asterisk-CallerID: 1243
X-Asterisk-CallerIDName: Alejandro Cardenas
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
I was also getting the template script witch i removed in order to do the testing.
I can see that my project is connected(apparently)
You are now logged in as [myemailaccount].
Your current project is [None]. You can change this setting by running:
$ gcloud config set project PROJECT_ID
[asterisk@pbx sbin]$ gcloud config set project my project id
Updated property [core/project].
[asterisk@pbx sbin]$
What else i can do to test this. Any suggestion?
Thanks in advance
FYI; since i do not have a comercial license(or pay for any license) I use posfix to send SMTP emails thru google, that part works great.
I've been using this script since late 2019 (thank you @tony722) and in recent months it's started to return Google was unable to recognize any speech in audio data. So I added the latest changes provided by @sr10952 and @levishores. Still not working. So, I debugged line by line and determined I was getting a transcription back from Google. The failure was here:
if [ -z "$FILTERED" ]
The extra space before the closing bracket was triggering the if statement. The odd thing is this extra space has always been in @tony722's script. Removing it fixed it though.
@Acpek23 give that a try. If it doesn't work, see if Google is returning a transcription: login as the asterisk user and run
RESULT=gcloud ml speech recognize stream.part3.flac --language-code='en-US'
echo $RESULT
Thanks for updating the script @tony722!
Thanks @chrisduncansn for finding that!
I've been using this script since late 2019 (thank you @tony722) and in recent months it's started to return Google was unable to recognize any speech in audio data. So I added the latest changes provided by @sr10952 and @levishores. Still not working. So, I debugged line by line and determined I was getting a transcription back from Google. The failure was here:
if [ -z "$FILTERED" ]
The extra space before the closing bracket was triggering the if statement. The odd thing is this extra space has always been in @tony722's script. Removing it fixed it though.
@Acpek23 give that a try. If it doesn't work, see if Google is returning a transcription: login as the asterisk user and run
RESULT=
gcloud ml speech recognize stream.part3.flac --language-code='en-US'
echo $RESULT
Unfortuanaly im still unable to use this im getting nothing.
i changed if [ -z "$FILTERED" ]-- same issue
I also tried to save the .mp3 on temp file to see if the conversion was success, nothing
save voicemail in tmp folder in case of trouble
TMPMP3=$(mktemp -u /tmp/msg_XXXXXXXX.mp3)
cp "stream.part3.mp3" "$TMPMP3"
I also tried this:
[asterisk@pbx sbin]$ RESULT=gcloud ml speech recognize stream.part3.flac --language-code='en-US'
bash: ml: command not found
I ran: gcloud ml speech recognize stream.part3.flac --language-code='en-US' 1>result 2>error
The $RESULT is create but is empty
Any other suggestion?
Thanks again!!
[asterisk@pbx sbin]$ RESULT=gcloud ml speech recognize stream.part3.flac --language-code='en-US'
bash: ml: command not found
You're missing a space between =gcloud
now im getting this error: ERROR: (gcloud.ml.speech.recognize) Invalid audio source [stream.part3.flac]. The source must either be a local path or a Google Cloud Storage URL (such as gs://bucket/object).
Any suggestion?
RESULT=gcloud ml speech recognize stream.part3.flac --language-code='en-US'
youre right and im getting this:
[asterisk@pbx sbin]$ RESULT= gcloud ml speech recognize stream.part3.flac --language-code='en-US'
ERROR: (gcloud.ml.speech.recognize) Invalid audio source [stream.part3.flac]. The source must either be a local path or a Google Cloud Storage URL (such as gs://bucket/object).
[asterisk@pbx sbin]$
//comment this line at the bottom to keep the TMP directory for analysis after the script runs
rm -Rf $TMPDIR
run the script again, then cd in to the temp directory and re-run
RESULT=gcloud ml speech recognize stream.part3.flac --language-code='en-US'
echo $RESULT
same result
[asterisk@pbx tmp]$ cd tmp.t6QQWwfhbN/
[asterisk@pbx tmp.t6QQWwfhbN]$ ls
stream.new
[asterisk@pbx tmp.t6QQWwfhbN]$ RESULT= gcloud ml speech recognize stream.part3.flac --language-code='en-US'
ERROR: (gcloud.ml.speech.recognize) Invalid audio source [stream.part3.flac]. The source must either be a local path or a Google Cloud Storage URL (such as gs://bucket/object).
[asterisk@pbx tmp.t6QQWwfhbN]$ echo $RESULT
[asterisk@pbx tmp.t6QQWwfhbN]$
on stream.new im able to see the "normal message" this is the one that im currently sending.
or do i need to disabled this on the pbx configuration?
i have this config: Mail Command : /usr/sbin/sendmail-gcloud
Alejandro Cardenas,
Hay un nuevo correo de voz en el buzón ext:
De: "Gustavo Martinez" <ext>
Duración del mensaje: 0:19 seconds
Fecha: Wednesday, September 23, 2020 at 05:24:35 PM
Marca *98 para acceder a su correo de voz por teléfono.
Ingresa a url para revisar su correo de voz con un navegador web.
You may wish to consider using the phone_call model. This will improve the transcription.
You may wish to consider using the phone_call model. This will improve the transcription.
Could you please be more specific? Example what to change? Thanks
You may wish to consider using the phone_call model. This will improve the transcription.
Could you please be more specific? Example what to change? Thanks
Looks like skippy1976 is referring to the speech model options available in Speech-to-Text. But I don't see an option to set a model using gcloud from the terminal.
Here's model documentation
Here's the gcloud documentation
Looks like there's a 60 second limit for the transcriptions using "gcloud ml speech recognize". But there would be no limit to the length using "gcloud ml speech recognize-long-running". I know the length of the message is stored somewhere as that ends up in the body of the email. Anyone have any ideas on how to modify this to use an "if then" option for longer voicemails?
I found a couple options that I like while digging into the documentation. The options I like are on the alpha channel, so there's a good chance they won't work long-term, but I'm okay with that on my setup. Here's what I changed:
ORIGINAL: RESULT=`gcloud ml speech recognize stream.part3.flac --language-code='en-US'\`
NEW: RESULT=`gcloud alpha ml speech recognize stream.part3.flac --language-code='en-US' --interaction-type='voicemail' --include-word-time-offsets --filter-profanity --enable-automatic-punctuation`
Hey guys, new to this topic... Trying to get this to work on my Asterisk box... follow all the steps as indicated. Didn't get any errors along the way, but I don't seem to get any results... The voicemail still answers, records the file... and I still get the audio file to my email.. but at the bottom I see
--Google transcription result --
(Google was unable to recognize any speech in audio data.)
Also noticed that I can't play the MP3 file attached with the email.... says it's unsupported or corrupt.
Did some more testing... when I leave a voicemail... and I go into the /tmp/tmp.xxxxxxx folder... I can run the command manually
RESULT= `gcloud ml speech recognize stream.part3.flac --language-code='en-US'`
and with echo $RESULT I get the transcription like so...
{ "results": [ { "alternatives": [ { "confidence": 0.7456564, "transcript": "This is yet another test of the voicemail system Richard testing Richard testing." } ] } ] }
But still unable to get it in the email from Asterisk....
In the error file in the /tmp/tmp.xxxxxx I see this --
ERROR: (gcloud.ml.speech.recognize) Your current active account [xxxxxxxxxxx@gmail.com] does not have any valid credentials
Please run:
$ gcloud auth login
to obtain new credentials.
For service account, please activate it first:
$ gcloud auth activate-service-account ACCOUNT
Which is weird because the command runs manually....
Thanks!
Richard
Ok, well it turns out that this line --- export CLOUDSDK_CONFIG=/home/asterisk/.config/gcloud
was a problem for my setup... Now I get the transcription....
But!!!!! the audio file is still a problem. the mp3 file doesn't work... can't listen to it..
@tony722 Any ideas ??? Anyone ???
@kevinrossen have the MP3 attachments been working for you ?
This script fails for any voicemail longer than 1 minute, with the following error:
ERROR: (gcloud.ml.speech.recognize) INVALID_ARGUMENT: Sync input too long. For audio longer than 1 min use LongRunningRecognize with a 'uri' parameter.
I have fixed it by replacing
sox -G stream.part3.wav --channels=1 --bits=16 --rate=8000 stream.part3.flac
with
sox -G stream.part3.wav --channels=1 --bits=16 --rate=8000 stream.part3.flac trim 0 59
this does not "fix" the issue of too long voicemails, but it changes it so it only transcribes the first 59 seconds, which in my case is good enough.
I found a couple options that I like while digging into the documentation. The options I like are on the alpha channel, so there's a good chance they won't work long-term, but I'm okay with that on my setup. Here's what I changed:
ORIGINAL:
RESULT=`gcloud ml speech recognize stream.part3.flac --language-code='en-US'\`
NEW:
RESULT=`gcloud alpha ml speech recognize stream.part3.flac --language-code='en-US' --interaction-type='voicemail' --include-word-time-offsets --filter-profanity --enable-automatic-punctuation`
@kevinrossen how are those alpha options working for you? I checked and it looks like they are still in Alpha status, which is a bummer.
was a problem for my setup... Now I get the transcription....
@CadillacRick did you run it as asterisk or root?
Thanks @tony722 @chrisduncansn I preferred to modify the APIs used to support multiple languages and punctuation. Now I no longer have to worry about the caller's language (at least up to three additional languages).
I have noticed that if the sentence starts with a word in a different language from the rest of the message there can be problems. It might be interesting to use different APIs for multiple languages in succession.
https://gist.github.com/rr10/472f88b41d7383ba6e04f982c0f8a7c2
RESULT=gcloud alpha ml speech recognize-long-running stream.part3.flac --language-code='ro-RO' --additional-language-codes='it-IT','en-US' --enable-automatic-punctuation --interaction-type=voicemail
I'm a noob with this gcloud, for me the fix was to su -asterisk gcloud auth login and then do the same for set the project id, I'm sure there was a more clever way it didn't put the json key in the asterisk directory until I did it that way and when i copied it permissions were a mess (for me anyway)
In no circumstance should anybody ever chmod 777 /usr/bin/dos2unix
. The dos2unix command as installed by the operating system correctly set to 744, and will thus be executable for all users.
Adding write bits for "everyone" to this program means that if an attacker gains even the lowest-privilege account on the machine, they can completely replace the contents of dos2unix. If at any point in the future that program is run by 'root' to perform any action at all, the attacker now owns your entire machine. I've used this technique myself several times when doing embedded security assessments.
I just had to rebuild our PBX and re-did this install - this was the one part I wondered about. I just chmod 744'd it back to as it was. Thank you for coming here and saying this.
Thanks for the awesome script, I did have to troubleshoot it a little. It appears this now needs to be done with a service account.
Thanks for the awesome script, I did have to troubleshoot it a little. It appears this now needs to be done with a service account.
Can you please specify the need of a "service account"?
Thanks
Thanks for the awesome script, I did have to troubleshoot it a little. It appears this now needs to be done with a service account.
Can you please specify the need of a "service account"? Thanks
When using 'gcloud auth login' and using my credentials it would fail because the speech to text wants you to use a service account. Which can be done with the following.
gcloud auth activate-service-account "ServiceAccountName"@cobalt-bliss-383201.iam.gserviceaccount.com --key-file=./"KeyFileName".json
See creating a service account in Google Cloud.
https://cloud.google.com/iam/docs/keys-create-delete#creating
I have updated my comment. Github screwed with the ticks in the code, so yeah, if you just blindly copied and pasted
RESULT= gcloud ml speech recognize stream.part3.flac --language-code='en-US' 1>result 2>error
you were gonna break the script. (I'll also add, this isn't even the line I suggested you add - you're setting the RESULT variable equal to the line that logs the output to two different files. Not even sure what bash is going to do there, but I'll bet it's not gonna work.)
Did you read that full comment? The concept is using it for debugging because the gcloud command doesn't execute properly when it's executed by Asterisk as part of the script. You can't see the error, but if you add an additional gcloud command and log the error text to a file, then keep the script from deleting the TMP file, you can look at the error. Root cause was that it was trying to access the gcloud config inside the root home folder, not asterisk's home.
The fix was simply to add
export CLOUDSDK_CONFIG=/home/asterisk/.config/gcloud
anywhere above the gcloud command.