Skip to content

Instantly share code, notes, and snippets.

@jeanmidevacc
Created January 28, 2024 21:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeanmidevacc/74f9c9826898b5e4ecec09fabcc91104 to your computer and use it in GitHub Desktop.
Save jeanmidevacc/74f9c9826898b5e4ecec09fabcc91104 to your computer and use it in GitHub Desktop.
build_timecodes_based_on_silence
from pydub import AudioSegment, silence
import pandas as pd
def build_segments(audio, length_segment=10, dbfs=0):
silences = silence.detect_silence(audio, min_silence_len=1000, silence_thresh=dbfs-16)
dfp_silences = pd.DataFrame(silences, columns = ["start_timecode", "end_timecode"])
threshold_segment = int(length_segment * 60 * 1000)
first_timecode = 0
last_timecode = int(audio.duration_seconds * 1000)
buffer = []
for idx, row in dfp_silences.iterrows():
delta_time_segment = row["start_timecode"] - first_timecode
if delta_time_segment >= threshold_segment:
buffer.append([first_timecode, row["start_timecode"]])
first_timecode = row["end_timecode"]
#Check the last segments
dfp_new_segments = pd.DataFrame(buffer, columns = ["start_timecode", "end_timecode"])
if dfp_new_segments.at[len(dfp_new_segments)-1, "end_timecode"] != last_timecode:
dfp_new_segments.at[len(dfp_new_segments)-1, "end_timecode"] = last_timecode
return dfp_new_segments
audio = AudioSegment.from_mp3(f"location_of_file.mp3")
dfp_timecodes = build_segments(audio, length_segment=10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment