Skip to content

Instantly share code, notes, and snippets.

@JustinGrote
Last active May 2, 2024 00:50
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JustinGrote/c51b106d7f0558c088d5a2dc055dbc2c to your computer and use it in GitHub Desktop.
Save JustinGrote/c51b106d7f0558c088d5a2dc055dbc2c to your computer and use it in GitHub Desktop.
Podbean Podcast Transcription to Storage Account
#requires -module Az.CognitiveServices
using namespace Microsoft.Azure.Commands.Management.CognitiveServices.Models
function Get-PodBeanPodcast ($Name = 'powershellpodcast') {
Invoke-RestMethod https://feed.podbean.com/$Name/feed.xml
}
#region Base
function Connect-AzSpeech {
[OutputType([AzSpeechContext])]
[CmdletBinding()]
param(
[Parameter(Mandatory, ValueFromPipeline)]
[Microsoft.Azure.Commands.Management.CognitiveServices.Models.PSCognitiveServicesAccount]
$CognitiveServicesAccount
)
$location = $CognitiveServicesAccount.Location
$apiVersion = '3.2-preview.1'
$baseUri = "https://$location.api.cognitive.microsoft.com/speechtotext/v$apiVersion/"
$endpoint = 'transcriptions'
$uri = $baseUri + $endpoint
$key = $CognitiveServicesAccount | Get-AzCognitiveServicesAccountKey | Select-Object -Expand Key1
$key ??= (throw 'Cannot retrieve key from cognitive service')
$script:__AzSpeechContext = @{
baseUri = $baseUri
key = $key
}
}
class AzSpeechContext {
[hashtable] $IrmParams
[string] $BaseUri
}
function Get-AzSpeechContext {
$script:__AzSpeechContext ??= $(throw 'You must run Connect-AzSpeech first')
[AzSpeechContext]@{
IrmParams = @{
Headers = @{
'Ocp-Apim-Subscription-Key' = $script:__AzSpeechContext.key
}
ContentType = 'application/json'
}
BaseUri = $script:__AzSpeechContext.baseUri
}
}
function Invoke-AzSpeech {
[CmdletBinding()]
param(
#Either parameter
$Endpoint,
$Uri,
$Body,
[string]$Method = $Body ? 'POST' : 'GET',
[AzSpeechContext]$Context
)
$Context ??= Get-AzSpeechContext
$Uri ??= $Context.BaseUri + $Endpoint
$irmParams = $Context.IrmParams
$irmParams.Verbose = $false #Response message is not helpful
Write-Debug "Invoking Speech $Method $Uri"
Invoke-RestMethod @irmParams -Method $Method -Body (ConvertTo-Json -Depth 10 $Body) -Uri $Uri
}
#endRegion Base
class TranscriptCreateRequest {
[string[]] $contentUrls
[string] $contentContainerUrl
[string] $displayName
[int[]] $channels
[string] $locale = 'en-US'
[hashtable] $properties = @{
diarizationEnabled = $false
channels = @(0)
diarization = @{
speakers = @{
minCount = 1
maxCount = 5
}
}
}
}
function Start-AzSpeechTranscription {
[CmdletBinding()]
param(
#The content urls to transcribe
[Parameter(Mandatory, ParameterSetName = 'ByUrl')]
[string[]]$contentUrl,
[Parameter(Mandatory, ParameterSetName = 'ByDestinationContainer')]
[string]$contentContainerUrl,
[string]$DisplayName = [Guid]::NewGuid(),
#Specify 0,1 to analyze both stereo channels
[int[]]$Channels = 0,
#Identify Speakers if specified
[Switch]$Diarization
)
$body = [TranscriptCreateRequest]::new()
if ($contentContainerUrl) {
$body.contentContainerUrl = $contentContainerUrl
} else {
$body.contentUrls = $contentUrl
}
$body.displayName = $DisplayName
$body.properties.channels = $Channels
$body.properties.diarizationEnabled = $Diarization.IsPresent
Invoke-AzSpeech -Endpoint 'transcriptions' -Body $Body
}
filter Get-AzSpeechTranscription {
[CmdletBinding()]
param(
#The cognitive service account to run the request against
[Parameter(ValueFromPipeline)]$Response
)
if ($null -eq $Response) {
return (Invoke-AzSpeech 'transcriptions').values
}
if (-not $Response.self) {
throw 'Response must be a transcription response'
}
Invoke-AzSpeech -Uri $Response.Self
}
filter Wait-AzSpeechTranscription {
[CmdletBinding()]
param(
#The cognitive service account to run the request against
[Parameter(Mandatory, ValueFromPipeline)]$Response,
#How often to check for updates
$CheckInterval = 1
)
if (-not $Response.self) {
throw 'Response must be a transcription response'
}
$Response = Invoke-AzSpeech -Uri $Response.Self
while ($Response.status -notmatch 'Succeeded|Failed') {
Write-Verbose "Transcription Job $($Response.displayName) is currently $($Response.status)"
$Response = Invoke-AzSpeech -Uri $Response.Self
Start-Sleep -Seconds $CheckInterval
}
return $Response
}
filter Get-AzSpeechTranscriptionFile {
[CmdletBinding()]
param(
#The cognitive service account to run the request against
[Parameter(Mandatory, ValueFromPipeline)]$Response
)
if (-not $Response.self) {
throw 'Response must be a transcription response'
}
(Invoke-AzSpeech -Uri ($Response.Self + '/files')).values
}
filter Get-AzSpeechTranscriptionFileContent {
[CmdletBinding()]
param(
#The list of files received
[Parameter(Mandatory, ValueFromPipeline)]$Response
)
if (-not $Response.links.contentUrl) {
throw 'Response must be a transcription file'
}
foreach ($url in $Response.links.contentUrl) {
Invoke-AzSpeech -Uri $Response.links.contentUrl
}
}
filter Get-AzSpeechTranscript {
[CmdletBinding()]
param(
#The list of files received
[Parameter(Mandatory, ValueFromPipeline)]$Response
)
if (-not $Response.self) {
throw 'Response must be a transcription'
}
$Response
| Get-AzSpeechTranscriptionFile
| Where-Object kind -EQ 'transcription'
| Get-AzSpeechTranscriptionFileContent
}
filter Get-AzSpeechRecognizedPhrase {
[CmdletBinding()]
param(
#A regex filter for the phrase you are looking for
[string]$Filter,
#The transcription result de-jsonified from the transcript.json file
[Parameter(Mandatory, ValueFromPipeline)]$TranscriptFileContent
)
if (-not $TranscriptFileContent.recognizedPhrases.count -gt 0) {
throw 'Response must be a transcription result'
}
$TranscriptFileContent.recognizedPhrases
| Where-Object { $_.nBest.lexical -match $Filter }
| Add-Member -NotePropertyName 'Transcript' -NotePropertyValue $TranscriptFileContent -PassThru -Force #Makes it easier to get the referenced data
}
# Play an audio mp3 file from a particular duration to a particular end point
filter Start-TranscriptClip {
[CmdletBinding()]
param(
#The phrase snippet to play
[Parameter(Mandatory, ValueFromPipeline)]$RecognizedPhrase,
#How much Before context to play
[int]$BeforeSeconds = 0,
#How much after context to play
[int]$AfterSeconds = 0
)
if (-not $RecognizedPhrase.Transcript -or -not $RecognizedPhrase.offset -or -not $RecognizedPhrase.duration) {
throw 'Response must be a recognized phrase with a transcript attached'
}
$source = $RecognizedPhrase.Transcript.source.split('?')[0]
[float]$start = [Xml.XmlConvert]::ToTimeSpan($RecognizedPhrase.offset).TotalSeconds - $BeforeSeconds
[float]$duration = [Xml.XmlConvert]::ToTimeSpan($RecognizedPhrase.duration).TotalSeconds + $AfterSeconds
Write-Verbose "Playing $source from $start for $duration seconds"
Write-Verbose "Transcript Contents: $($RecognizedPhrase.nBest.display)"
& vlc $source --play-and-exit --start-time $start --run-time $duration
}
filter Get-PSPodcastMentionLeaderBoard([Parameter(ValueFromPipeline)]$Name, [Object[]]$Transcripts) {
[PSCustomObject]@{
Name = $Name
Mentions = ($Transcripts | Get-AzSpeechRecognizedPhrase -Filter $Name).count
}
}
# #Main
function Get-PSPodcastEgoBoost ($Name,$Transcript) {
$Transcript ??= Get-AzSpeechTranscription
| Get-AzSpeechTranscript
$Transcript
| Get-AzSpeechRecognizedPhrase -Filter $Name
| Get-Random -Count 1
| Start-TranscriptClip
}
#Get Podcasts
#Get-PodBeanPodcast | sort episode -descending |% -Throttle 5 -Parallel {iwr $_.enclosure.url -outfile "$HOME/downloads/podcasts/psp-s$($_.season ?? 0)e$($_.episode).mp3"}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment