surinderlohat/commn python methods for colab

## commn python methods for colab
colab helping methods
# Clear console output i.e large import statement logs
from IPython.display import clear_output
clear_output(wait=True)

# show audio
from IPython.display import Audio, display
audio = AudioSegment.from_file(audio_path)
display(Audio("path.wav"))
---------------------------------------------------
# Packages need to audio processing
!pip install yt-dlp
!pip install pydub
------------------------------------------------------------------------------------------------
# Create directory if it doesn't exist
import os
os.makedirs(base_path, exist_ok=True)

def create_clean_directory(directory_path):
    os.makedirs(directory_path,exist_ok=True)
    # Remove and recreate the directory
    if os.path.exists(directory_path):
        shutil.rmtree(directory_path)
    os.makedirs(directory_path,exist_ok=True)

# List Files in directory
import glob
segment_audio_paths = glob.glob(f"{directory}/*")  # List all files in the directory
print("segment_audio_paths",segment_audio_paths)
# Loop through all WAV files in the folder
for filename in segment_audio_paths:
    if filename.endswith(".mp3"):
        # file_path = os.path.join(folder_path, filename)
--------------------------------------------------------------------------------------------
#Load JSON file
import json
with open("file.txt", 'r', encoding='utf-8') as f:
    result = json.load(f)

result['segments'][:10]
--------------------------------------------------------------------------------------------

import time
import edge_tts

async def text_to_speech(text,index):
    voice_short_name = "hi-IN-MadhurNeural".split(" - ")[0]
    rate_str = f"{0:+d}%"
    pitch_str = f"{5:+d}Hz"
    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
    await communicate.save(f"{save_audio_path}/{index}_audio.mp3")
    return communicate

# Make Dir if not Exist
save_audio_path = f"{base_path}/audio_segments"
os.makedirs(save_audio_path, exist_ok=True)

for index, text in enumerate(updated_segmeent_df['text_hindi']):
  # print(f"Processing {index}",text)
  await text_to_speech(text,index+1)
  time.sleep(1)
----------------------------------------------------------------------------------------------------

# Extract mel-spectrogram from audio using librosa
import librosa

def extract_mel_spectrogram(audio_path, n_mels=128, fmin=0, fmax=8000):
    # Load audio file
    y, sr = librosa.load(audio_path, sr=None)

    # Compute mel-spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax)

    # Convert to log scale (log-mel spectrogram)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    return log_mel_spectrogram

---------------------------------------------------------
# Loop thrugh directory
# # Convert all downloaded audio to 16kHz mono wav files
# for file in os.listdir(playlist_path):
#     if file.endswith('.wav'):
#         file_path = os.path.join(playlist_path, file)
#         audio = AudioSegment.from_wav(file_path)
#         audio = audio.set_frame_rate(16000).set_channels(1)
#         audio.export(file_path, format='wav')
------------------------------------------------------
!pip install git+https://github.com/openai/whisper.git
!apt-get install ffmpeg
!pip install pydub

import whisper
from pydub import AudioSegment

# Load the Whisper model
# Load the Whisper model (you can choose 'tiny', 'base', 'small', 'medium', 'large')
model = whisper.load_model("large")

result = model.transcribe(raw_audio_path, task="translate", language="zh")
result

# Save Response in daw file
import json
filter_data = {}
filter_data['text'] = result['text']
segments = []
for segment in result['segments']:
    segments.append({'start': segment['start'], 'end': segment['end'], 'text': segment['text'] })
filter_data['segments'] = segments
with open("filename.txt", 'w', encoding='utf-8') as f:
  json.dump(filter_data, f, ensure_ascii=False, indent=4)


-------------------------------------------------------
# Google Speach
!pip install SpeechRecognition
# Expariment chunk audio in 5 sec chunks and try to match the spectograms
import speech_recognition as sr

def transcribe_audio_with_timestamps(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)

    try:
        result = recognizer.recognize_google(audio,language="hi-IN", show_all=True)
        return result.get('alternative')[0]['transcript']
    except sr.UnknownValueError:
        print("Speech Recognition could not understand audio.")
    except sr.RequestError:
        print("Could not request results.")

audio_path ="/content/drive/MyDrive/YoutubeDownloads/ProcessedAudioData/FYQWb2OBxY8/chunk_2.wav"
response= transcribe_audio_with_timestamps(audio_path)
print(response)

#
	colab helping methods
	# Clear console output i.e large import statement logs
	from IPython.display import clear_output
	clear_output(wait=True)

	# show audio
	from IPython.display import Audio, display
	audio = AudioSegment.from_file(audio_path)
	display(Audio("path.wav"))
	---------------------------------------------------
	# Packages need to audio processing
	!pip install yt-dlp
	!pip install pydub
	------------------------------------------------------------------------------------------------
	# Create directory if it doesn't exist
	import os
	os.makedirs(base_path, exist_ok=True)

	def create_clean_directory(directory_path):
	os.makedirs(directory_path,exist_ok=True)
	# Remove and recreate the directory
	if os.path.exists(directory_path):
	shutil.rmtree(directory_path)
	os.makedirs(directory_path,exist_ok=True)

	# List Files in directory
	import glob
	segment_audio_paths = glob.glob(f"{directory}/*") # List all files in the directory
	print("segment_audio_paths",segment_audio_paths)
	# Loop through all WAV files in the folder
	for filename in segment_audio_paths:
	if filename.endswith(".mp3"):
	# file_path = os.path.join(folder_path, filename)
	--------------------------------------------------------------------------------------------
	#Load JSON file
	import json
	with open("file.txt", 'r', encoding='utf-8') as f:
	result = json.load(f)

	result['segments'][:10]
	--------------------------------------------------------------------------------------------

	import time
	import edge_tts

	async def text_to_speech(text,index):
	voice_short_name = "hi-IN-MadhurNeural".split(" - ")[0]
	rate_str = f"{0:+d}%"
	pitch_str = f"{5:+d}Hz"
	communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
	await communicate.save(f"{save_audio_path}/{index}_audio.mp3")
	return communicate

	# Make Dir if not Exist
	save_audio_path = f"{base_path}/audio_segments"
	os.makedirs(save_audio_path, exist_ok=True)

	for index, text in enumerate(updated_segmeent_df['text_hindi']):
	# print(f"Processing {index}",text)
	await text_to_speech(text,index+1)
	time.sleep(1)
	----------------------------------------------------------------------------------------------------

	# Extract mel-spectrogram from audio using librosa
	import librosa

	def extract_mel_spectrogram(audio_path, n_mels=128, fmin=0, fmax=8000):
	# Load audio file
	y, sr = librosa.load(audio_path, sr=None)

	# Compute mel-spectrogram
	mel_spectrogram = librosa.feature.melspectrogram(y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax)

	# Convert to log scale (log-mel spectrogram)
	log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

	return log_mel_spectrogram

	---------------------------------------------------------
	# Loop thrugh directory
	# # Convert all downloaded audio to 16kHz mono wav files
	# for file in os.listdir(playlist_path):
	# if file.endswith('.wav'):
	# file_path = os.path.join(playlist_path, file)
	# audio = AudioSegment.from_wav(file_path)
	# audio = audio.set_frame_rate(16000).set_channels(1)
	# audio.export(file_path, format='wav')
	------------------------------------------------------
	!pip install git+https://github.com/openai/whisper.git
	!apt-get install ffmpeg
	!pip install pydub

	import whisper
	from pydub import AudioSegment

	# Load the Whisper model
	# Load the Whisper model (you can choose 'tiny', 'base', 'small', 'medium', 'large')
	model = whisper.load_model("large")

	result = model.transcribe(raw_audio_path, task="translate", language="zh")
	result

	# Save Response in daw file
	import json
	filter_data = {}
	filter_data['text'] = result['text']
	segments = []
	for segment in result['segments']:
	segments.append({'start': segment['start'], 'end': segment['end'], 'text': segment['text'] })
	filter_data['segments'] = segments
	with open("filename.txt", 'w', encoding='utf-8') as f:
	json.dump(filter_data, f, ensure_ascii=False, indent=4)


	-------------------------------------------------------
	# Google Speach
	!pip install SpeechRecognition
	# Expariment chunk audio in 5 sec chunks and try to match the spectograms
	import speech_recognition as sr

	def transcribe_audio_with_timestamps(audio_file):
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_file) as source:
	audio = recognizer.record(source)

	try:
	result = recognizer.recognize_google(audio,language="hi-IN", show_all=True)
	return result.get('alternative')[0]['transcript']
	except sr.UnknownValueError:
	print("Speech Recognition could not understand audio.")
	except sr.RequestError:
	print("Could not request results.")

	audio_path ="/content/drive/MyDrive/YoutubeDownloads/ProcessedAudioData/FYQWb2OBxY8/chunk_2.wav"
	response= transcribe_audio_with_timestamps(audio_path)
	print(response)

	#