Skip to content

Cost

Cost calculation and budget enforcement.

Calculator

agentprobe.cost.calculator

Cost calculator for agent execution traces.

Loads pricing data from YAML files and computes per-call and per-trace costs based on token usage.

PricingEntry

Bases: BaseModel

Pricing for a single model.

Attributes:

Name Type Description
model str

Model identifier.

input_cost_per_1k float

Cost per 1,000 input tokens in USD.

output_cost_per_1k float

Cost per 1,000 output tokens in USD.

Source code in src/agentprobe/cost/calculator.py
class PricingEntry(BaseModel):
    """Pricing for a single model.

    Attributes:
        model: Model identifier.
        input_cost_per_1k: Cost per 1,000 input tokens in USD.
        output_cost_per_1k: Cost per 1,000 output tokens in USD.
    """

    model_config = ConfigDict(strict=True, extra="forbid")

    model: str
    input_cost_per_1k: float = Field(ge=0.0)
    output_cost_per_1k: float = Field(ge=0.0)

PricingConfig

Bases: BaseModel

Collection of pricing entries.

Attributes:

Name Type Description
entries dict[str, PricingEntry]

Mapping of model name to pricing entry.

Source code in src/agentprobe/cost/calculator.py
class PricingConfig(BaseModel):
    """Collection of pricing entries.

    Attributes:
        entries: Mapping of model name to pricing entry.
    """

    model_config = ConfigDict(extra="forbid")

    entries: dict[str, PricingEntry] = Field(default_factory=dict)

    @classmethod
    def load_from_dir(cls, pricing_dir: str | Path | None = None) -> PricingConfig:
        """Load pricing data from all YAML files in a directory.

        Args:
            pricing_dir: Directory containing pricing YAML files.
                Defaults to the bundled pricing_data directory.

        Returns:
            A PricingConfig with all entries loaded.
        """
        directory = Path(pricing_dir) if pricing_dir else _DEFAULT_PRICING_DIR
        entries: dict[str, PricingEntry] = {}

        if not directory.is_dir():
            logger.warning("Pricing directory not found: %s", directory)
            return cls(entries=entries)

        for yaml_file in sorted(directory.glob("*.yaml")):
            try:
                raw = yaml.safe_load(yaml_file.read_text(encoding="utf-8"))
                if not isinstance(raw, dict):
                    continue
                models = raw.get("models", [])
                for model_data in models:
                    if isinstance(model_data, dict) and "model" in model_data:
                        entry = PricingEntry.model_validate(model_data)
                        entries[entry.model] = entry
            except Exception:
                logger.exception("Failed to load pricing from %s", yaml_file)

        logger.info("Loaded pricing for %d models", len(entries))
        return cls(entries=entries)

load_from_dir(pricing_dir=None) classmethod

Load pricing data from all YAML files in a directory.

Parameters:

Name Type Description Default
pricing_dir str | Path | None

Directory containing pricing YAML files. Defaults to the bundled pricing_data directory.

None

Returns:

Type Description
PricingConfig

A PricingConfig with all entries loaded.

Source code in src/agentprobe/cost/calculator.py
@classmethod
def load_from_dir(cls, pricing_dir: str | Path | None = None) -> PricingConfig:
    """Load pricing data from all YAML files in a directory.

    Args:
        pricing_dir: Directory containing pricing YAML files.
            Defaults to the bundled pricing_data directory.

    Returns:
        A PricingConfig with all entries loaded.
    """
    directory = Path(pricing_dir) if pricing_dir else _DEFAULT_PRICING_DIR
    entries: dict[str, PricingEntry] = {}

    if not directory.is_dir():
        logger.warning("Pricing directory not found: %s", directory)
        return cls(entries=entries)

    for yaml_file in sorted(directory.glob("*.yaml")):
        try:
            raw = yaml.safe_load(yaml_file.read_text(encoding="utf-8"))
            if not isinstance(raw, dict):
                continue
            models = raw.get("models", [])
            for model_data in models:
                if isinstance(model_data, dict) and "model" in model_data:
                    entry = PricingEntry.model_validate(model_data)
                    entries[entry.model] = entry
        except Exception:
            logger.exception("Failed to load pricing from %s", yaml_file)

    logger.info("Loaded pricing for %d models", len(entries))
    return cls(entries=entries)

CostCalculator

Calculates costs for agent execution traces.

Uses pricing data to compute per-call costs, aggregates by model, and optionally enforces budget limits.

Attributes:

Name Type Description
pricing

The pricing configuration.

budget_limit_usd

Optional maximum cost per trace.

Source code in src/agentprobe/cost/calculator.py
class CostCalculator:
    """Calculates costs for agent execution traces.

    Uses pricing data to compute per-call costs, aggregates by model,
    and optionally enforces budget limits.

    Attributes:
        pricing: The pricing configuration.
        budget_limit_usd: Optional maximum cost per trace.
    """

    def __init__(
        self,
        pricing: PricingConfig | None = None,
        budget_limit_usd: float | None = None,
    ) -> None:
        """Initialize the cost calculator.

        Args:
            pricing: Pricing configuration. Loads defaults if None.
            budget_limit_usd: Optional budget limit in USD.
        """
        self._pricing = pricing or PricingConfig.load_from_dir()
        self._budget_limit = budget_limit_usd

    def calculate_llm_cost(self, call: LLMCall) -> float:
        """Calculate the cost of a single LLM call.

        Args:
            call: The LLM call to price.

        Returns:
            Cost in USD.
        """
        entry = self._pricing.entries.get(call.model)
        if entry is None:
            logger.warning("No pricing found for model: %s", call.model)
            return 0.0

        input_cost = (call.input_tokens / 1000.0) * entry.input_cost_per_1k
        output_cost = (call.output_tokens / 1000.0) * entry.output_cost_per_1k
        return input_cost + output_cost

    def calculate_trace_cost(self, trace: Trace) -> CostSummary:
        """Calculate the total cost for a trace.

        Args:
            trace: The execution trace to price.

        Returns:
            A CostSummary with per-model breakdown.

        Raises:
            BudgetExceededError: If budget_limit_usd is set and exceeded.
        """
        breakdowns: dict[str, dict[str, Any]] = {}

        for call in trace.llm_calls:
            cost = self.calculate_llm_cost(call)
            entry = self._pricing.entries.get(call.model)
            input_cost = 0.0
            output_cost = 0.0
            if entry is not None:
                input_cost = (call.input_tokens / 1000.0) * entry.input_cost_per_1k
                output_cost = (call.output_tokens / 1000.0) * entry.output_cost_per_1k

            if call.model not in breakdowns:
                breakdowns[call.model] = {
                    "input_tokens": 0,
                    "output_tokens": 0,
                    "input_cost_usd": 0.0,
                    "output_cost_usd": 0.0,
                    "total_cost_usd": 0.0,
                    "call_count": 0,
                }

            bd = breakdowns[call.model]
            bd["input_tokens"] += call.input_tokens
            bd["output_tokens"] += call.output_tokens
            bd["input_cost_usd"] += input_cost
            bd["output_cost_usd"] += output_cost
            bd["total_cost_usd"] += cost
            bd["call_count"] += 1

        model_breakdowns = {
            model: CostBreakdown(model=model, **data) for model, data in breakdowns.items()
        }

        total_llm = sum(bd.total_cost_usd for bd in model_breakdowns.values())
        total_input = sum(bd.input_tokens for bd in model_breakdowns.values())
        total_output = sum(bd.output_tokens for bd in model_breakdowns.values())

        summary = CostSummary(
            total_llm_cost_usd=total_llm,
            total_tool_cost_usd=0.0,
            total_cost_usd=total_llm,
            breakdown_by_model=model_breakdowns,
            total_input_tokens=total_input,
            total_output_tokens=total_output,
        )

        if self._budget_limit is not None and total_llm > self._budget_limit:
            raise BudgetExceededError(total_llm, self._budget_limit)

        return summary

__init__(pricing=None, budget_limit_usd=None)

Initialize the cost calculator.

Parameters:

Name Type Description Default
pricing PricingConfig | None

Pricing configuration. Loads defaults if None.

None
budget_limit_usd float | None

Optional budget limit in USD.

None
Source code in src/agentprobe/cost/calculator.py
def __init__(
    self,
    pricing: PricingConfig | None = None,
    budget_limit_usd: float | None = None,
) -> None:
    """Initialize the cost calculator.

    Args:
        pricing: Pricing configuration. Loads defaults if None.
        budget_limit_usd: Optional budget limit in USD.
    """
    self._pricing = pricing or PricingConfig.load_from_dir()
    self._budget_limit = budget_limit_usd

calculate_llm_cost(call)

Calculate the cost of a single LLM call.

Parameters:

Name Type Description Default
call LLMCall

The LLM call to price.

required

Returns:

Type Description
float

Cost in USD.

Source code in src/agentprobe/cost/calculator.py
def calculate_llm_cost(self, call: LLMCall) -> float:
    """Calculate the cost of a single LLM call.

    Args:
        call: The LLM call to price.

    Returns:
        Cost in USD.
    """
    entry = self._pricing.entries.get(call.model)
    if entry is None:
        logger.warning("No pricing found for model: %s", call.model)
        return 0.0

    input_cost = (call.input_tokens / 1000.0) * entry.input_cost_per_1k
    output_cost = (call.output_tokens / 1000.0) * entry.output_cost_per_1k
    return input_cost + output_cost

calculate_trace_cost(trace)

Calculate the total cost for a trace.

Parameters:

Name Type Description Default
trace Trace

The execution trace to price.

required

Returns:

Type Description
CostSummary

A CostSummary with per-model breakdown.

Raises:

Type Description
BudgetExceededError

If budget_limit_usd is set and exceeded.

Source code in src/agentprobe/cost/calculator.py
def calculate_trace_cost(self, trace: Trace) -> CostSummary:
    """Calculate the total cost for a trace.

    Args:
        trace: The execution trace to price.

    Returns:
        A CostSummary with per-model breakdown.

    Raises:
        BudgetExceededError: If budget_limit_usd is set and exceeded.
    """
    breakdowns: dict[str, dict[str, Any]] = {}

    for call in trace.llm_calls:
        cost = self.calculate_llm_cost(call)
        entry = self._pricing.entries.get(call.model)
        input_cost = 0.0
        output_cost = 0.0
        if entry is not None:
            input_cost = (call.input_tokens / 1000.0) * entry.input_cost_per_1k
            output_cost = (call.output_tokens / 1000.0) * entry.output_cost_per_1k

        if call.model not in breakdowns:
            breakdowns[call.model] = {
                "input_tokens": 0,
                "output_tokens": 0,
                "input_cost_usd": 0.0,
                "output_cost_usd": 0.0,
                "total_cost_usd": 0.0,
                "call_count": 0,
            }

        bd = breakdowns[call.model]
        bd["input_tokens"] += call.input_tokens
        bd["output_tokens"] += call.output_tokens
        bd["input_cost_usd"] += input_cost
        bd["output_cost_usd"] += output_cost
        bd["total_cost_usd"] += cost
        bd["call_count"] += 1

    model_breakdowns = {
        model: CostBreakdown(model=model, **data) for model, data in breakdowns.items()
    }

    total_llm = sum(bd.total_cost_usd for bd in model_breakdowns.values())
    total_input = sum(bd.input_tokens for bd in model_breakdowns.values())
    total_output = sum(bd.output_tokens for bd in model_breakdowns.values())

    summary = CostSummary(
        total_llm_cost_usd=total_llm,
        total_tool_cost_usd=0.0,
        total_cost_usd=total_llm,
        breakdown_by_model=model_breakdowns,
        total_input_tokens=total_input,
        total_output_tokens=total_output,
    )

    if self._budget_limit is not None and total_llm > self._budget_limit:
        raise BudgetExceededError(total_llm, self._budget_limit)

    return summary

Budget Enforcer

agentprobe.cost.budget

Budget enforcement for test execution cost management.

Provides the BudgetEnforcer class for checking individual test and suite-level costs against configured budget limits.

BudgetEnforcer

Enforces cost budgets for tests and suites.

Checks actual costs against configured limits and returns verdict objects indicating whether budgets were exceeded.

Attributes:

Name Type Description
test_budget_usd

Maximum cost per individual test.

suite_budget_usd

Maximum cost per test suite run.

Source code in src/agentprobe/cost/budget.py
class BudgetEnforcer:
    """Enforces cost budgets for tests and suites.

    Checks actual costs against configured limits and returns
    verdict objects indicating whether budgets were exceeded.

    Attributes:
        test_budget_usd: Maximum cost per individual test.
        suite_budget_usd: Maximum cost per test suite run.
    """

    def __init__(
        self,
        *,
        test_budget_usd: float | None = None,
        suite_budget_usd: float | None = None,
    ) -> None:
        """Initialize the budget enforcer.

        Args:
            test_budget_usd: Maximum cost per test in USD.
            suite_budget_usd: Maximum cost per suite in USD.
        """
        self._test_budget = test_budget_usd
        self._suite_budget = suite_budget_usd

    @staticmethod
    def _check(actual: float, limit: float) -> BudgetCheckResult:
        """Compare actual cost against a budget limit.

        Args:
            actual: Actual cost in USD.
            limit: Budget limit in USD.

        Returns:
            A BudgetCheckResult with within_budget verdict.
        """
        remaining = limit - actual
        utilization = (actual / limit * 100.0) if limit > 0 else 0.0
        within = actual <= limit
        return BudgetCheckResult(
            within_budget=within,
            actual_cost_usd=actual,
            budget_limit_usd=limit,
            remaining_usd=remaining,
            utilization_pct=round(utilization, 2),
        )

    def check_test(self, cost_summary: CostSummary) -> BudgetCheckResult | None:
        """Check a single test's cost against the test budget.

        Args:
            cost_summary: Cost summary for the test.

        Returns:
            A BudgetCheckResult if a test budget is configured, else None.
        """
        if self._test_budget is None:
            return None
        result = self._check(cost_summary.total_cost_usd, self._test_budget)
        if not result.within_budget:
            logger.warning(
                "Test budget exceeded: $%.4f > $%.4f limit",
                cost_summary.total_cost_usd,
                self._test_budget,
            )
        return result

    def check_suite(self, cost_summaries: list[CostSummary]) -> BudgetCheckResult | None:
        """Check a suite's total cost against the suite budget.

        Args:
            cost_summaries: Cost summaries for all tests in the suite.

        Returns:
            A BudgetCheckResult if a suite budget is configured, else None.
        """
        if self._suite_budget is None:
            return None
        total = sum(cs.total_cost_usd for cs in cost_summaries)
        result = self._check(total, self._suite_budget)
        if not result.within_budget:
            logger.warning(
                "Suite budget exceeded: $%.4f > $%.4f limit",
                total,
                self._suite_budget,
            )
        return result

__init__(*, test_budget_usd=None, suite_budget_usd=None)

Initialize the budget enforcer.

Parameters:

Name Type Description Default
test_budget_usd float | None

Maximum cost per test in USD.

None
suite_budget_usd float | None

Maximum cost per suite in USD.

None
Source code in src/agentprobe/cost/budget.py
def __init__(
    self,
    *,
    test_budget_usd: float | None = None,
    suite_budget_usd: float | None = None,
) -> None:
    """Initialize the budget enforcer.

    Args:
        test_budget_usd: Maximum cost per test in USD.
        suite_budget_usd: Maximum cost per suite in USD.
    """
    self._test_budget = test_budget_usd
    self._suite_budget = suite_budget_usd

check_test(cost_summary)

Check a single test's cost against the test budget.

Parameters:

Name Type Description Default
cost_summary CostSummary

Cost summary for the test.

required

Returns:

Type Description
BudgetCheckResult | None

A BudgetCheckResult if a test budget is configured, else None.

Source code in src/agentprobe/cost/budget.py
def check_test(self, cost_summary: CostSummary) -> BudgetCheckResult | None:
    """Check a single test's cost against the test budget.

    Args:
        cost_summary: Cost summary for the test.

    Returns:
        A BudgetCheckResult if a test budget is configured, else None.
    """
    if self._test_budget is None:
        return None
    result = self._check(cost_summary.total_cost_usd, self._test_budget)
    if not result.within_budget:
        logger.warning(
            "Test budget exceeded: $%.4f > $%.4f limit",
            cost_summary.total_cost_usd,
            self._test_budget,
        )
    return result

check_suite(cost_summaries)

Check a suite's total cost against the suite budget.

Parameters:

Name Type Description Default
cost_summaries list[CostSummary]

Cost summaries for all tests in the suite.

required

Returns:

Type Description
BudgetCheckResult | None

A BudgetCheckResult if a suite budget is configured, else None.

Source code in src/agentprobe/cost/budget.py
def check_suite(self, cost_summaries: list[CostSummary]) -> BudgetCheckResult | None:
    """Check a suite's total cost against the suite budget.

    Args:
        cost_summaries: Cost summaries for all tests in the suite.

    Returns:
        A BudgetCheckResult if a suite budget is configured, else None.
    """
    if self._suite_budget is None:
        return None
    total = sum(cs.total_cost_usd for cs in cost_summaries)
    result = self._check(total, self._suite_budget)
    if not result.within_budget:
        logger.warning(
            "Suite budget exceeded: $%.4f > $%.4f limit",
            total,
            self._suite_budget,
        )
    return result