Skip to content

Instantly share code, notes, and snippets.

@DaveIW2034
Created June 12, 2019 15:39
Show Gist options
  • Save DaveIW2034/30df387744e53eef0e9df72039c0dc25 to your computer and use it in GitHub Desktop.
Save DaveIW2034/30df387744e53eef0e9df72039c0dc25 to your computer and use it in GitHub Desktop.
gridtext 文件读入的中文
# 说明文档
# https://blog.csdn.net/duxin_csdn/article/details/88966295
# 库文件位置
# https://github.com/kylebgorman/textgrid
# 解析代码
import textgrid
def is_chinese(uchar):
"""判断一个unicode是否是汉字"""
if (uchar >= u'\u4e00') and (uchar<=u'\u9fa5'):
return True
else:
return False
tg = textgrid.TextGrid()
tg.read('./REC0001.textgrid')
for item in tg:
# print(item.name, item.intervals, len(item.intervals))
for i in item.intervals:
print(i.mark, type(i.mark), type(i.mark.encode('utf-8')))
# 文件导入应有格式
"""
File type = "ooTextFile"
Object class = "TextGrid"
xmin = 0
xmax = 144.43
tiers? <exists>
size = 3
item []:
item [1]:
class = "IntervalTier"
name = "GLOBAL"
xmin = 0
xmax = 144.43
intervals: size = 1
intervals [1]:
xmin = 0
xmax = 144.43
text = "[speaker]: 1 male, 2 male, 3 male; [language]: 1 普通话, 2 普通话, 3 普通话"
item [2]:
class = "IntervalTier"
name = "SPEAKER"
xmin = 0
xmax = 144.43
intervals: size = 2
intervals [1]:
xmin = 0
xmax = 1.1184741690417594
text = ""
intervals [2]:
xmin = 1.1184741690417594
xmax = 1.7980652315593901
text = "3"
item [2]:
class = "IntervalTier"
name = "CONTENT"
xmin = 0
xmax = 144.43
intervals: size = 2
intervals [1]:
xmin = 0
xmax = 1.1184741690417594
text = "[ENS]"
intervals [2]:
xmin = 1.1184741690417594
xmax = 1.7980652315593901
text = "[UNK]你好"
intervals [3]:
xmin = 1.7980652315593901
xmax = 2.7925887376827525
text = "[ENS]"
intervals [4]:
xmin = 2.7925887376827525
xmax = 3.604782934350176
text = "[*]"
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment