dauuricus/json_to_srt.py

## json_to_srt.py
import json
import datetime
from google.colab import files
import copy
import sys
#sys.setrecursionlimit(30000)

uploaded = files.upload()

#upfilename = 'json.txt'


for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(uploaded[fn])))
    upfilename = fn

def fmttime(seconds):
    secs = seconds #millisecs / 1000.0
    d = datetime.timedelta(seconds=secs)
    t = (datetime.datetime.min + d).time()
    milli = t.strftime('%f')[:3]
    value = t.strftime('%H:%M:%S,') + milli
    return value

original_stdout = sys.stdout #""" stdout backup """
filename = 'subtitle.srt' #""" print subtitle text to this file """
with open(upfilename, 'r') as up_f:
    line = up_f.read()
    jso = json.loads(line)
    ###print(jso['transcripts'][0]['words'])
    with open(filename,'w',encoding='utf8') as down_f:

        sys.stdout = down_f #""" stdout to file """"

        totaltime = 0
        sentence = []

        endtime = ''
        starttime = ''
        lastword_time = 0
        lineNum = 1


        def list_copy(n):
            temp = []
            for i in range(n):
                c = copy.deepcopy(jso['transcripts'][i]['words'])
                temp.append(c)
            print_word(temp,0)

        #""" check confidence
        def print_word(copy,n) :
            if (len(copy[n])) > 0:
                #print(len(copy[n]))
                dic = copy[n].pop(0)
               ##for ob in jso['transcripts'][n]['words']:
                key = [v for v in dic.values()]
                print("confidence:",str(n)+':', key[0])
                #print("confidence:",str(n)+':', dic)
                n +=1
                if n > len(copy)-1:
                    n = 0

               ## for ob in dic.values():
               ##     print('confidence;',str(n)+':',ob)
               ##     n += 1
               ##     if n > len(copy)-1:
               ##         n = 0
               ##     break

                print_word(copy,n)
            else:
                for k in range(len(copy)):
                    if k > len(copy) - 1:
                        return
                        break
                    else:
                        n += 1
                        if n > len(copy) - 1:
                            n = 0
                        if len(copy[n]) < 1:
                            continue
                        else:
                            print_word(copy,n)

               ## n += 1
               ## if n > len(copy)-1:
               ##     n = 0
               ## if len(copy[n]) < 1:
               ##     n += 1
               ##     if n > len(copy)-1:
               ##         n = 0
               ##     if len(copy[n]) < 1:
               ##         return
               ## else:
               ##     print_word(copy,n)

        #list_copy(3)

        #confidence:0
        confidence =jso['transcripts'][0]
        #print(confidence)
        for i,ob in enumerate(confidence['words']):
            #print(i,ob)
            talk_start = True
            talk_end = False

            for key in ob:
                if key == 'word':
                    ###print(jso['transcripts'][0]['words'][i][key])
                    if ob[key] != '':
                        sentence.append(ob.get(key))
                    ###print(*sentence)

                elif key == 'start_time':
                    ###print(jso['transcripts'][0]['words'][i][key])
                    time = ob[key]
                    if  time - lastword_time < 1:
                        talk_start = False
                        talk_end = False

                    elif time - lastword_time >= 1: # 1 secons silence
                        talk_start = False
                        talk_end = True
                        ### block >
                        totaltime = 0
                        endtime = fmttime(lastword_time)
                        if len(sentence) > 1:
                            temp = sentence.pop()
                            print(lineNum)
                            lineNum += 1
                            print(starttime,'-->',endtime2)
                            # this word goes to next caption
                            kotoba = ''
                            for word in sentence:
                                kotoba += word + ' '
                            print(kotoba.rstrip())
                            print()
                            sentence.clear()
                            sentence.append(temp) # new caption
                        ### <  block

                    if len(sentence) == 1 :
                        talk_start = True
                        talk_end = False
                        starttime = fmttime(time)
                        p_time = time

                elif key == 'duration':
                    ###print(jso['transcripts'][0]['words'][i][key])
                    totaltime += ob[key]
                    lastword_time = p_time + totaltime

                    endtime2 = fmttime(lastword_time)

                    #print('in :',fmttime(time),'>>',*sentence)
                    #print('end :',fmttime(time+totaltime))
                    #print('bt :',fmttime(totaltime))

                    if totaltime >= 4: # 4 seconds speech gose to 1 caption
                        ### block >
                        totaltime = 0
                        endtime = fmttime(lastword_time)
                        print(lineNum)
                        lineNum += 1
                        print(starttime,'-->',endtime)
                        kotoba = ''
                        for word in sentence:
                            kotoba += word + ' '

                        print(kotoba.rstrip())
                        print()
                        sentence.clear()
                        ### < block
                    elif totaltime < 4 and i + 1 == len(confidence['words']):
                        ### block >
                        totaltime = 0
                        endtime = fmttime(lastword_time)
                        print(lineNum)
                        lineNum += 1
                        print(starttime,'-->',endtime)
                        kotoba = ''
                        for word in sentence:
                            kotoba += word + ' '

                        print(kotoba.rstrip())
                        print()
                        sentence.clear()
                        ### < block
        sys.stdout = original_stdout # stdout back

files.download(filename) # download .srt file
	import json
	import datetime
	from google.colab import files
	import copy
	import sys
	#sys.setrecursionlimit(30000)

	uploaded = files.upload()

	#upfilename = 'json.txt'


	for fn in uploaded.keys():
	print('User uploaded file "{name}" with length {length} bytes'.format(name=fn, length=len(uploaded[fn])))
	upfilename = fn

	def fmttime(seconds):
	secs = seconds #millisecs / 1000.0
	d = datetime.timedelta(seconds=secs)
	t = (datetime.datetime.min + d).time()
	milli = t.strftime('%f')[:3]
	value = t.strftime('%H:%M:%S,') + milli
	return value

	original_stdout = sys.stdout #""" stdout backup """
	filename = 'subtitle.srt' #""" print subtitle text to this file """
	with open(upfilename, 'r') as up_f:
	line = up_f.read()
	jso = json.loads(line)
	###print(jso['transcripts'][0]['words'])
	with open(filename,'w',encoding='utf8') as down_f:

	sys.stdout = down_f #""" stdout to file """"

	totaltime = 0
	sentence = []

	endtime = ''
	starttime = ''
	lastword_time = 0
	lineNum = 1


	def list_copy(n):
	temp = []
	for i in range(n):
	c = copy.deepcopy(jso['transcripts'][i]['words'])
	temp.append(c)
	print_word(temp,0)

	#""" check confidence
	def print_word(copy,n) :
	if (len(copy[n])) > 0:
	#print(len(copy[n]))
	dic = copy[n].pop(0)
	##for ob in jso['transcripts'][n]['words']:
	key = [v for v in dic.values()]
	print("confidence:",str(n)+':', key[0])
	#print("confidence:",str(n)+':', dic)
	n +=1
	if n > len(copy)-1:
	n = 0

	## for ob in dic.values():
	## print('confidence;',str(n)+':',ob)
	## n += 1
	## if n > len(copy)-1:
	## n = 0
	## break

	print_word(copy,n)
	else:
	for k in range(len(copy)):
	if k > len(copy) - 1:
	return
	break
	else:
	n += 1
	if n > len(copy) - 1:
	n = 0
	if len(copy[n]) < 1:
	continue
	else:
	print_word(copy,n)

	## n += 1
	## if n > len(copy)-1:
	## n = 0
	## if len(copy[n]) < 1:
	## n += 1
	## if n > len(copy)-1:
	## n = 0
	## if len(copy[n]) < 1:
	## return
	## else:
	## print_word(copy,n)

	#list_copy(3)

	#confidence:0
	confidence =jso['transcripts'][0]
	#print(confidence)
	for i,ob in enumerate(confidence['words']):
	#print(i,ob)
	talk_start = True
	talk_end = False

	for key in ob:
	if key == 'word':
	###print(jso['transcripts'][0]['words'][i][key])
	if ob[key] != '':
	sentence.append(ob.get(key))
	###print(*sentence)

	elif key == 'start_time':
	###print(jso['transcripts'][0]['words'][i][key])
	time = ob[key]
	if time - lastword_time < 1:
	talk_start = False
	talk_end = False

	elif time - lastword_time >= 1: # 1 secons silence
	talk_start = False
	talk_end = True
	### block >
	totaltime = 0
	endtime = fmttime(lastword_time)
	if len(sentence) > 1:
	temp = sentence.pop()
	print(lineNum)
	lineNum += 1
	print(starttime,'-->',endtime2)
	# this word goes to next caption
	kotoba = ''
	for word in sentence:
	kotoba += word + ' '
	print(kotoba.rstrip())
	print()
	sentence.clear()
	sentence.append(temp) # new caption
	### < block

	if len(sentence) == 1 :
	talk_start = True
	talk_end = False
	starttime = fmttime(time)
	p_time = time

	elif key == 'duration':
	###print(jso['transcripts'][0]['words'][i][key])
	totaltime += ob[key]
	lastword_time = p_time + totaltime

	endtime2 = fmttime(lastword_time)

	#print('in :',fmttime(time),'>>',*sentence)
	#print('end :',fmttime(time+totaltime))
	#print('bt :',fmttime(totaltime))

	if totaltime >= 4: # 4 seconds speech gose to 1 caption
	### block >
	totaltime = 0
	endtime = fmttime(lastword_time)
	print(lineNum)
	lineNum += 1
	print(starttime,'-->',endtime)
	kotoba = ''
	for word in sentence:
	kotoba += word + ' '

	print(kotoba.rstrip())
	print()
	sentence.clear()
	### < block
	elif totaltime < 4 and i + 1 == len(confidence['words']):
	### block >
	totaltime = 0
	endtime = fmttime(lastword_time)
	print(lineNum)
	lineNum += 1
	print(starttime,'-->',endtime)
	kotoba = ''
	for word in sentence:
	kotoba += word + ' '

	print(kotoba.rstrip())
	print()
	sentence.clear()
	### < block
	sys.stdout = original_stdout # stdout back

	files.download(filename) # download .srt file