Created
June 12, 2019 15:39
-
-
Save DaveIW2034/a097973e3654b51ac3ffd8993638c45b to your computer and use it in GitHub Desktop.
gridtext 文件读入的中文
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 说明文档 | |
# https://blog.csdn.net/duxin_csdn/article/details/88966295 | |
# 库文件位置 | |
# https://github.com/kylebgorman/textgrid | |
# 解析代码 | |
import textgrid | |
def is_chinese(uchar): | |
"""判断一个unicode是否是汉字""" | |
if (uchar >= u'\u4e00') and (uchar<=u'\u9fa5'): | |
return True | |
else: | |
return False | |
tg = textgrid.TextGrid() | |
tg.read('./REC0001.textgrid') | |
for item in tg: | |
# print(item.name, item.intervals, len(item.intervals)) | |
for i in item.intervals: | |
print(i.mark, type(i.mark), type(i.mark.encode('utf-8'))) | |
# 文件导入应有格式 | |
""" | |
File type = "ooTextFile" | |
Object class = "TextGrid" | |
xmin = 0 | |
xmax = 144.43 | |
tiers? <exists> | |
size = 3 | |
item []: | |
item [1]: | |
class = "IntervalTier" | |
name = "GLOBAL" | |
xmin = 0 | |
xmax = 144.43 | |
intervals: size = 1 | |
intervals [1]: | |
xmin = 0 | |
xmax = 144.43 | |
text = "[speaker]: 1 male, 2 male, 3 male; [language]: 1 普通话, 2 普通话, 3 普通话" | |
item [2]: | |
class = "IntervalTier" | |
name = "SPEAKER" | |
xmin = 0 | |
xmax = 144.43 | |
intervals: size = 2 | |
intervals [1]: | |
xmin = 0 | |
xmax = 1.1184741690417594 | |
text = "" | |
intervals [2]: | |
xmin = 1.1184741690417594 | |
xmax = 1.7980652315593901 | |
text = "3" | |
item [2]: | |
class = "IntervalTier" | |
name = "CONTENT" | |
xmin = 0 | |
xmax = 144.43 | |
intervals: size = 2 | |
intervals [1]: | |
xmin = 0 | |
xmax = 1.1184741690417594 | |
text = "[ENS]" | |
intervals [2]: | |
xmin = 1.1184741690417594 | |
xmax = 1.7980652315593901 | |
text = "[UNK]你好" | |
intervals [3]: | |
xmin = 1.7980652315593901 | |
xmax = 2.7925887376827525 | |
text = "[ENS]" | |
intervals [4]: | |
xmin = 2.7925887376827525 | |
xmax = 3.604782934350176 | |
text = "[*]" | |
""" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment