Performance Benchmarks

This notebook measures rustfuzz performance using Python's timeit and visualises results with plotly.

All benchmarks run N = 10,000 iterations on a fixed string pair to produce stable measurements.

import timeit

import rustfuzz.fuzz as fuzz
from rustfuzz.distance import DamerauLevenshtein, JaroWinkler, Levenshtein

N = 10_000
S1, S2 = (
    "the quick brown fox jumps over the lazy dog",
    "the quick brown fox jumped over a lazy dog",
)

print(f"Strings:\n  s1 = {S1!r}\n  s2 = {S2!r}\n")
print(f"Iterations: {N:,}")

def bench(fn, *args, n=N) -> float:
    """Return milliseconds per call (median of 5 runs)."""
    times = timeit.repeat(lambda: fn(*args), number=n, repeat=5)
    return min(times) / n * 1000  # ms per call


benchmarks = {
    "fuzz.ratio": lambda: fuzz.ratio(S1, S2),
    "fuzz.partial_ratio": lambda: fuzz.partial_ratio(S1, S2),
    "fuzz.token_sort_ratio": lambda: fuzz.token_sort_ratio(S1, S2),
    "fuzz.token_set_ratio": lambda: fuzz.token_set_ratio(S1, S2),
    "fuzz.WRatio": lambda: fuzz.WRatio(S1, S2),
    "Levenshtein.distance": lambda: Levenshtein.distance(S1, S2),
    "Levenshtein.normalized": lambda: Levenshtein.normalized_similarity(S1, S2),
    "JaroWinkler.similarity": lambda: JaroWinkler.similarity(S1, S2),
    "DamerauLevenshtein.dist": lambda: DamerauLevenshtein.distance(S1, S2),
}

results: dict[str, float] = {}

for name, fn in benchmarks.items():
    ms = bench(fn)
    results[name] = ms
    print(f"  {name:35}  {ms * 1000:.3f} μs/call")

print("\n✅ All benchmarks complete")

Results — Bar Chart

try:
    import plotly.graph_objects as go

    ops = list(results.keys())
    times = [v * 1000 for v in results.values()]  # μs

    colors = [
        f"rgba({int(168 + i * 4)},{int(85 - i * 2)},{int(247 - i * 10)},0.85)"
        for i in range(len(ops))
    ]

    fig = go.Figure(
        go.Bar(
            x=ops,
            y=times,
            marker_color=colors,
            text=[f"{t:.2f} μs" for t in times],
            textposition="outside",
        )
    )
    fig.update_layout(
        title="rustfuzz — microseconds per call (lower is better)",
        xaxis_title="Operation",
        yaxis_title="μs / call",
        paper_bgcolor="#0f0319",
        plot_bgcolor="#1a0533",
        font=dict(color="#d8b4fe"),
        xaxis=dict(tickangle=-30),
    )
    fig.show()

except ImportError:
    print("Install plotly: uv pip install plotly")
    for name, ms in results.items():
        bar = "█" * int(ms * 1000 / max(results.values()) * 40)
        print(f"  {name:35} {bar} {ms * 1000:.2f} μs")

Scaling benchmark — string length

How does Levenshtein.distance scale with string length?

import random
import string

random.seed(42)


def rand_str(n: int) -> str:
    return "".join(random.choices(string.ascii_lowercase, k=n))


lengths = [10, 50, 100, 250, 500, 1000]
scale_results: dict[int, float] = {}

for length in lengths:
    a, b = rand_str(length), rand_str(length)
    ms = bench(Levenshtein.distance, a, b, n=1000)
    scale_results[length] = ms * 1000  # μs
    print(f"  len={length:5d}  {ms * 1000:.3f} μs/call")

try:
    import plotly.graph_objects as go

    fig = go.Figure(
        go.Scatter(
            x=list(scale_results.keys()),
            y=list(scale_results.values()),
            mode="lines+markers",
            line=dict(color="#a855f7", width=3),
            marker=dict(color="#22c55e", size=10),
        )
    )
    fig.update_layout(
        title="Levenshtein.distance — scaling by string length",
        xaxis_title="String length (chars)",
        yaxis_title="μs / call",
        paper_bgcolor="#0f0319",
        plot_bgcolor="#1a0533",
        font=dict(color="#d8b4fe"),
    )
    fig.show()

except ImportError:
    for length, us in scale_results.items():
        print(f"  len={length:4d}  {us:.3f} μs")