34 Whisper Ai - General Purpose CPU Method
Whisper Ai from Open Ai is a powerful tool for automatic transcription and translation of audio files.
Many users need to carry out transcription and translation in a secure context, i.e. on a personal or work laptop. They are less likely to have high GPU availability and so may need to use CPU for compute. This can be slow, but works reasonably well if you have some time on your hands.
This python script provides a simple method for using Whisper Ai on a laptop’s CPUs. It works on Macs which currently don’t have support for running analysis like this on the GPUs because the torch package doesn’t yet support the Apple Metal framework for GPU based computation.
python.
import torch
import whisper
import sys
import os
import time
import ffmpeg
import multiprocessing
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
if len(sys.argv) < 2:
print("Usage: python whisper_transcribe.py <audio_file>")
sys.exit(1)
file_path = sys.argv[1]
if not os.path.isfile(file_path):
print(f"Error: File '{file_path}' not found.")
sys.exit(1)
# ✅ Force CPU execution
device = "cpu"
print(f"Using device: {device} (MPS not fully supported yet)")
# Load Whisper model on CPU using the tiny model
model = whisper.load_model("tiny").to(device)
def get_audio_duration(file_path):
"""Returns the duration of the audio file in seconds using FFmpeg."""
try:
probe = ffmpeg.probe(file_path)
return float(probe['format']['duration'])
except Exception as e:
print(f"Error getting audio duration: {e}")
return None
def transcribe_segment(segment):
"""Transcribes a segment of audio (returns timestamped result)."""
return segment["start"], segment["text"]
def transcribe_audio(file_path):
"""Multithreaded transcription of an audio file using Whisper."""
print(f"🎤 Processing: {file_path}")
audio_duration = get_audio_duration(file_path)
if audio_duration is None:
print("⚠️ Could not determine audio duration. Progress will be approximate.")
start_time = time.time()
results = []
# ✅ Run Whisper transcription
with tqdm(total=audio_duration, unit=" sec", dynamic_ncols=True) as pbar:
result = model.transcribe(file_path, verbose=False, fp16=False)
# ✅ Use a process pool to manage parallel execution
with ProcessPoolExecutor(max_workers=4) as executor: # Use 4 workers (adjust as needed)
future_to_segment = {executor.submit(transcribe_segment, seg): seg for seg in result["segments"]}
for future in future_to_segment:
start, text = future.result()
results.append((start, text))
# ✅ Update progress bar
elapsed_time = time.time() - start_time
pbar.update(start - pbar.n)
pbar.set_description("⏳ Transcribing...")
# ✅ Sort results by start time
results.sort()
# ✅ Save transcript
transcript_path = file_path + ".txt"
with open(transcript_path, "w") as f:
f.write(" ".join(text for _, text in results))
print(f"\n✅ Transcription complete! Saved to: {transcript_path}")
if __name__ == "__main__":
transcribe_audio(file_path)
To run this you simply type
`python whisper_transcribe.py
and let it run.
You’ll need to add the various packages to python before you begin. Examples available at this repo
https://github.com/chrissyhroberts/WHISPER_AI_TRANSCRIPTS_GENERAL_PURPOSE