Skip to content

Instantly share code, notes, and snippets.

@qkdxorjs1002
Last active January 24, 2024 01:34
Show Gist options
  • Save qkdxorjs1002/1b2009770466ced9926772f3fd71fec5 to your computer and use it in GitHub Desktop.
Save qkdxorjs1002/1b2009770466ced9926772f3fd71fec5 to your computer and use it in GitHub Desktop.
음성 수집 데이터 Pair(WAV/JSON) 일괄 체크 스크립트
from collections import defaultdict
import os
import time
import re
from datetime import datetime
### 디렉터리 경로
# 역슬래시는 이중으로
# 예시) = "C:\\NIA_COMPLETE_labeling\\"
sourcePath = "/Users/paragonnov/Documents/org/submit"
# 시작 시간 기록
startTime = time.time()
exceptionLogPath = datetime.now().strftime("exception-%Y-%m-%d_%H.%M.%S") + ".log"
exceptionLog = open(exceptionLogPath, "a", encoding="utf-8")
beforePath = ""
for path, dirs, files in os.walk(sourcePath):
print(path)
checkList = defaultdict(int)
for file in files:
try:
regexResult = re.search("(.*)(\..\w*$)", file)
if not regexResult:
raise Exception(path + "/" + file + "\n No RegEx result" + "\n")
fileNameNoExt = regexResult.group(1)
if not fileNameNoExt:
continue
checkList[fileNameNoExt] += 1
except Exception as e:
# 예외 로그에 기록
exceptionLog.write(path + "\n err: " + str(e) + "\n")
for key in checkList.keys():
if checkList[key] and checkList[key] < 2:
exceptionLog.write("\"" + key + "\" in \"" + path + "\"\n WARN: file is not a pair\n")
exceptionLog.close()
print("✅ Job is done.\n")
print("🔴 Running Time:", round(time.time() - startTime, 2), "\bs")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment