Code Examples for speechtotext

This page is an collection of examples of how to use the package.

Full usage example

Note

The .env file might have to be loaded before running the benchmarks.

from dotenv import load_dotenv
load_dotenv()

Use this module like this:

# Imports
import speechtotext
from speechtotext.datasets import Dataset
from speechtotext.benchmark.customBenchmarks import *
from speechtotext.benchmark.benchmarks import run_benchmarks, Benchmark
from speechtotext.functions import force_cudnn_initialization
from speechtotext.plot.plotting import Plotting

import speechtotext.metric.customMetrics
force_cudnn_initialization()

# Load dataset
dataset = Dataset(path_to_dir="path/to/dataset", name= "dataset_name")

# Settings to run benchmarks
number_of_samples = 100
report_name = "benchmark_name"

benchmark_class_list: list[Benchmark] = [WhisperBenchmark, WhisperAPIBenchmark]

# Run benchmarks
results = run_benchmarks(benchmark_class_list, dataset, number_of_samples, report_name)

# Create plots
plotting = Plotting(results=results, errors=Benchmark.ERROR_LIST, report_name = report_name)
plotting.save_all()

Add new model wrapper and benchmark

To add an new model to benchmark the following classes need to be made:

  • ModelWrapper

  • ModelVersion

  • Benchmark

Create new ModelWrapper

from speechtotext.model.modelWrapper import *
from speechtotext.functions import load_env_variable

# Model version
class CustomModelVersion(ModelVersion):
        MODEL_VERSION_1         = "version_1"
        MODEL_VERSION_2         = "version_2"
        MODEL_VERSION_ENHANCED  = "enhanced"

# Model wrapper
class CustomModelWrapper(ModelWrapper):

        LANGUAGE_CODE:str = 'nl'

        def __init__(self, model_version:CustomModelVersion):
                """Force correct model_version.
                """
                super().__init__(model_version)

        def get_model(self):
                """Load model or setup for API call.
                """
                self.API_KEY = load_env_variable("MODEL_API_KEY")
                self.model = model()

        def get_transcript_of_file(self, audio_file_name:str) -> str:
                """Get transcript of audio file."""
                result = self.model.transcribe(self.API_KEY, audio_file_name)
                return result["text"]

Create new Benchmark

from speechtotext.benchmark.benchmarks import *
from speechtotext.model.modelWrapper import ModelWrapper

class CustomModelBenchmark(Benchmark):
        MODEL_BASE = "Custom model name"

        def create_models(self) -> list[ModelWrapper]:
                models = []
                for version in CustomModelVersion:
                        models.append(CustomModelWrapper(version))
                return models

Use custom benchmarks

# Imports
import speechtotext
from speechtotext.datasets import Dataset
from speechtotext.benchmark.customBenchmarks import *
from speechtotext.benchmark.benchmarks import run_benchmarks, Benchmark
from speechtotext.functions import force_cudnn_initialization
from speechtotext.plot.plotting import Plotting

import speechtotext.metric.customMetrics
force_cudnn_initialization()

# Load dataset
dataset = Dataset(path_to_dir="path/to/dataset", name= "dataset_name")

# Settings to run benchmarks
number_of_samples = 100
report_name = "benchmark_name"

benchmark_class_list: list[Benchmark] = [WhisperBenchmark, WhisperAPIBenchmark, CustomModelBenchmark]

# Run benchmarks
results = run_benchmarks(benchmark_class_list, dataset, number_of_samples, report_name)

# Create plots
plotting = Plotting(results=results, errors=Benchmark.ERROR_LIST, report_name = report_name)
plotting.save_all()