nathangathright/deepgram.py

## deepgram.py
# convert deepgram json to PodcastNamespace json
# Usage: python deepgram.py <input.json>

import json
import sys

# load json
with open(sys.argv[1]) as f:
    data = json.load(f)

# get utterances
utterances = data['results']['utterances']

# create a transcript object
transcript = {
    'version': '1.0.0',
    'segments': []
}

# for each utterance, get the words array
for utterance in utterances:
    words = utterance['words']

    # for each word, get the start, end, speaker, and punctuated_word
    for word in words:
        # create a segment object
        segment = {
            # 'speaker': word['speaker'],
            'startTime': word['start'],
            'endTime': word['end'],
            'body': word['punctuated_word']
        }

        # add the segment to the segments array
        transcript['segments'].append(segment)

# save the transcript object to a json file
with open(sys.argv[1].split('.')[0] + '-namespace.json', 'w') as f:
    json.dump(transcript, f, indent=2)
	# convert deepgram json to PodcastNamespace json
	# Usage: python deepgram.py <input.json>

	import json
	import sys

	# load json
	with open(sys.argv[1]) as f:
	data = json.load(f)

	# get utterances
	utterances = data['results']['utterances']

	# create a transcript object
	transcript = {
	'version': '1.0.0',
	'segments': []
	}

	# for each utterance, get the words array
	for utterance in utterances:
	words = utterance['words']

	# for each word, get the start, end, speaker, and punctuated_word
	for word in words:
	# create a segment object
	segment = {
	# 'speaker': word['speaker'],
	'startTime': word['start'],
	'endTime': word['end'],
	'body': word['punctuated_word']
	}

	# add the segment to the segments array
	transcript['segments'].append(segment)

	# save the transcript object to a json file
	with open(sys.argv[1].split('.')[0] + '-namespace.json', 'w') as f:
	json.dump(transcript, f, indent=2)