Skip to content

Instantly share code, notes, and snippets.

View jeanmidevacc's full-sized avatar

Jean-Michel Daignan jeanmidevacc

View GitHub Profile
@jeanmidevacc
jeanmidevacc / build_timecodes_based_on_silence.py
Created January 28, 2024 21:38
build_timecodes_based_on_silence
from pydub import AudioSegment, silence
import pandas as pd
def build_segments(audio, length_segment=10, dbfs=0):
silences = silence.detect_silence(audio, min_silence_len=1000, silence_thresh=dbfs-16)
dfp_silences = pd.DataFrame(silences, columns = ["start_timecode", "end_timecode"])
threshold_segment = int(length_segment * 60 * 1000)
first_timecode = 0
last_timecode = int(audio.duration_seconds * 1000)
@jeanmidevacc
jeanmidevacc / openai_whisper.py
Created January 28, 2024 20:20
openai_whisper
from pathlib import Path
from openai import OpenAI
client_openai = OpenAI(
# This is the default and can be omitted
api_key="sk-XXX",
)
def get_transcript_openai_api(file, language="fr"):
# f = open(file, "rb")
@jeanmidevacc
jeanmidevacc / local_hf_whisper.py
Created January 28, 2024 18:16
local_hf_whisper
import torch
from transformers import pipeline
device = "cuda:0" if torch.cuda.is_available() else "cpu"
mapping = {"whisper-tiny" : "tiny", "whisper-small" : "small", "whisper-medium" : "medium", "whisper-base" : "base"}
hf_model_name = "whisper-medium"
size_model = mapping[hf_model_name] #tiny, base, small, medium
model = pipeline(
@jeanmidevacc
jeanmidevacc / local_whisper.py
Last active January 28, 2024 16:04
local_whisper.py
import whisper
size_model = "medium" #the type of model in the model card , with .en or not
model = whisper.load_model(size_model, device="cuda")
def get_transcript_local_whisper(model, file, language):
audio = whisper.load_audio(file)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
result = whisper.decode(model, mel, language=language)
from time import time
from hyperopt import fmin, tpe, hp, anneal, Trials
import mlflow
from sklearn.metrics import mean_squared_error
import surprise
def evaluate_model(model, dfp_ratings_test):
dfp_evaluation = dfp_ratings_test.copy()
dfp_evaluation["rating_predicted"] = dfp_evaluation.apply(lambda row: compute_ranking(model, str(row["userid"]), str(row["contentid"])), axis=1)
return mean_squared_error(dfp_evaluation["rating"].tolist(), dfp_evaluation["rating_predicted"].tolist(), squared=False)
for idx, row in dfp_archetypes.iterrows():
print("ARCHETYPE:", row["userid"])
inventory_positive = dfp_inventory_positive.loc[row["userid"]]
# Get the candidates
buffer = []
for contentid in inventory_positive:
closest_contentids = get_closest_neighbors(model_retriever_items, contentid, 10, type_="item")
buffer.extend(closest_contentids)
sp_count_contentids = pd.Series(dict(Counter(buffer))).sort_values(ascending=False)
def get_closest_neighbors(model, entityid, k, type_="item"):
if type_ == "item":
inner_entity_id = model.trainset.to_inner_iid(entityid)
else:
inner_entity_id = model.trainset.to_inner_uid(entityid)
closest_entity_id = model.get_neighbors(inner_entity_id, k)
if type_ == "item":
return [model.trainset.to_raw_iid(id_) for id_ in closest_entity_id]
def build_recommendations(model, userid, inventory, dfp_items, k=5):
dfp_recommendations = dfp_items[["title", "category", "year", "contentid"]]
dfp_recommendations["contentid"] = dfp_recommendations["contentid"].astype(str)
dfp_recommendations["rating_predicted"] = dfp_recommendations["contentid"].apply(lambda contentid: compute_ranking(model, str(userid), str(contentid)))
dfp_recommendations.sort_values("rating_predicted", ascending=False, inplace=True)
dfp_recommendations = dfp_recommendations.loc[dfp_recommendations["contentid"].isin(inventory) == False]
return dfp_recommendations.head(k).reset_index(drop=True)
import pandas as pd
from kats import models
# Selection your base model
def build_model(model, kts):
if model == "prophet":
return models.prophet.ProphetModel(kts, params=models.prophet.ProphetParams())
elif model == "theta":
return models.theta.ThetaModel(kts, params=models.theta.ThetaParams())
elif model == "holtwinters":
from kats.consts import TimeSeriesData
def build_kats_timeserie(dfp, column_time = "time", column_value = "value"):
return TimeSeriesData(time=dfp[column_time], value=dfp[column_value])
kts_test = build_kats_timeserie(dfp_test,"date","value")