We’ll write a basic function which takes in a YouTube video URL, uses the youtube_dl library to download the video as an Output, and runs the video through Whisper to generate a text transcript.
run.py
import whisperimport youtube_dldef transcribe(**inputs): # Grab the video URL passed from the API video_url = inputs["video_url"] # Create YouTube object yt = YouTube(video_url) video = yt.streams.filter(only_audio=True).first() # Download audio to the output path out_file = video.download(output_path="./") base, ext = os.path.splitext(out_file) new_file = base + ".mp3" os.rename(out_file, new_file) a = new_file # Load Whisper and transcribe audio model = whisper.load_model("small") result = model.transcribe(a) print(result["text"]) return {"pred": result["text"]}if __name__ == "__main__": video_url = "https://www.youtube.com/watch?v=adJFT6_j9Uk&ab_channel=minutephysics" transcribe(video_url=video_url)
{ "pred": " Welcome to the Pets Show. That is, Physics Explained in 10 Seconds. For the next month, in addition to Minute Physics, I'll be making one 10-second video every day. 10 Seconds of Physics explaining 5 seconds of titles on either end."}