Skip to content

Instantly share code, notes, and snippets.

@Davidnet
Last active February 7, 2023 16:52
Show Gist options
  • Save Davidnet/1668b17fe5216f175c09493e0ffb600a to your computer and use it in GitHub Desktop.
Save Davidnet/1668b17fe5216f175c09493e0ffb600a to your computer and use it in GitHub Desktop.
Test Kubeflow
from pathlib import Path
def download_videos(video_path: Path, audio_files: Path):
from shutil import make_archive
import tempfile
from pytube import YouTube
from pytube.exceptions import RegexMatchError
with open(video_path.resolve(), "r") as f:
videos_ids = f.read().splitlines()
tmpdirname = tempfile.TemporaryDirectory()
for video_id in videos_ids:
url = f"https://youtu.be/{video_id}"
try:
yt = YouTube(url)
except RegexMatchError:
print(f"RegexMatchError for '{url}'")
continue
itag = None
# we only want audio files
files = yt.streams.filter(only_audio=True)
for file in files:
# from audio files we grab the first audio for mp4 (eg mp3)
if file.mime_type == "audio/mp4":
itag = file.itag
break
if itag is None:
# just incase no MP3 audio is found (shouldn't happen)
print("NO MP3 AUDIO FOUND")
continue
# get the correct mp3 'stream'
stream = yt.streams.get_by_itag(itag)
# downloading the audio
stream.download(output_path=tmpdirname.name, filename=f"{video_id}.mp3")
make_archive(str(audio_files.resolve()), "gztar", tmpdirname.name)
from google.cloud import aiplatform
from google.oauth2.credentials import Credentials
creds = Credentials(
TOKEN
)
PROJECT = "mlops-explorations"
aiplatform.init(project=PROJECT, credentials=creds)
job = aiplatform.PipelineJob(
display_name="Kubeflow Test Knowledge Pipeline",
template_path="pipeline.yaml",
credentials=creds,
project=PROJECT,
parameter_values={
"video_location": "TBD"
},
location="us-central1",
)
job.run()
https://www.youtube.com/watch?v=qDKSH_X68XE https://www.youtube.com/watch?v=Rqah6F0D6-Y https://www.youtube.com/watch?v=pk9ptAkR84k https://www.youtube.com/watch?v=8nIhUx9rnOU https://www.youtube.com/watch?v=1PKH_D6zjoM
@Davidnet
Copy link
Author

Davidnet commented Feb 7, 2023

Python

qDKSH_X68XE
Rqah6F0D6-Y
pk9ptAkR84k
8nIhUx9rnOU
1PKH_D6zjoM

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment