💰 Cost tracking

Cost tracking for LLMs with a gateway means monitoring and managing the costs of using different LLMs through an API gateway.

Our Otoroshi LLM extension helps you optimize usage, control your budget, and improve cost efficiency across models.

You can track the cost of each request and generate reports for each model.

If you want to track the costs of your LLM Usage, you can enable it in the Otoroshi LLM Extension (it should be enabled by default)

Configuration

costs-tracking {
  enabled = true
  enabled = ${?CLOUD_APIM_EXTENSIONS_LLM_EXTENSION_COSTS_TRACKING_ENABLED}
  embed-costs-tracking-in-responses = false
  embed-costs-tracking-in-responses = ${?CLOUD_APIM_EXTENSIONS_LLM_EXTENSION_COSTS_TRACKING_EMBED_COSTS_TRACKING_IN_RESPONSES}
}

Once it's enabled, audit events of kind LLMUsageAudit will have a costs object.

You can also embed the costs value in your LLM responses using the costs-tracking.embed-costs-tracking-in-responses config.

The costs tracking computation is based on project LitLLM models price dictionnary and only supports right now the following providers

openai
deepseek
x-ai
azure-openai
cloudflare
gemini
mistral 🇫🇷 🇪🇺
ollama
cohere
anthropic
groq

but if you're okay with approximations, you can set some metadata on your providers to use supported providers / models

costs-tracking-provider: the provider used for costs tracking computation
costs-tracking-model: the model used for costs tracking computation

Example of costs tracking embed in responses

NOTE: you can embed costs tracking informations by using embed_costs=true query param

$ curl --request POST \
  --url 'http://test.oto.tools:9999/v1/chat/completions?embed_costs=true' \
  --header 'authorization: Bearer otoapk_mqXJ9YrgVM0rcGZy_0a35ab6e5b5407cc7200f94f43f60c583928d372ef43b99a28b93243c3c90153' \
  --header 'content-type: application/json' \
  --data '{
  "messages": [
    {
      "role": "user",
      "content": "tell me a joke"
    }
  ]
}'

Response from the LLM :

{
  "id": "chatcmpl-VRyJP4WKPFG2bWODCKp0yXn3UkwtpdnQ",
  "object": "chat.completion",
  "created": 1743169375,
  "model": "gpt-4o-mini",
  "system_fingerprint": "fp-CGPX1MTbpRo7OvGCR0MPPwZiXz9sm8N0",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"
      },
      "logprobs": null,
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 11,
    "completion_tokens": 18,
    "total_tokens": 29,
    "completion_tokens_details": {
      "reasoning_tokens": 0
    }
  },
  "costs": {
    "input_cost": 0.00000165,
    "output_cost": 0.0000108,
    "reasoning_cost": 0,
    "total_cost": 0.00001245,
    "currency": "dollar"
  }
}

Example of LLMUsageAudit event with cost tracking

{
  "@id" : "1905616593920983819",
  "@timestamp" : 1743169375292,
  "@type" : "AuditEvent",
  "@product" : "otoroshi",
  "@serviceId" : "",
  "@service" : "Otoroshi",
  "@env" : "dev",
  "audit" : "LLMUsageAudit",
  "provider_kind" : "openai",
  "provider" : "provider_10bbc76d-7cd8-4cb7-b760-61e749a1b691",
  "duration" : 415,
  "model" : "gpt-4o-mini",
  "rate_limit" : {
    "requests_limit" : 10000,
    "requests_remaining" : 9999,
    "tokens_limit" : 200000,
    "tokens_remaining" : 199993
  },
  "usage" : {
    "prompt_tokens" : 11,
    "generation_tokens" : 18,
    "reasoning_tokens" : 0
  },
  "error" : null,
  "consumed_using" : "chat/completion/blocking",
  "user" : null,
  "apikey" : null,
  "route" : {
    "_loc" : {
      "tenant" : "default",
      "teams" : [ "default" ]
    },
    "id" : "route_e4a9d6cb3-d859-4203-a860-8d1dd6d09557",
    "name" : "test",
    "description" : "A new route",
    "tags" : [ ],
    "metadata" : {
      "created_at" : "2025-03-28T10:10:19.448+01:00",
      "updated_at" : "2025-03-28T10:48:48.218+01:00"
    },
    "enabled" : true,
    "debug_flow" : false,
    "export_reporting" : false,
    "capture" : false,
    "groups" : [ "default" ],
    "bound_listeners" : [ ],
    "frontend" : {
      "domains" : [ "test.oto.tools" ],
      "strip_path" : true,
      "exact" : false,
      "headers" : { },
      "query" : { },
      "methods" : [ ]
    },
    "backend" : {
      "targets" : [ {
        "id" : "target_1",
        "hostname" : "request.otoroshi.io",
        "port" : 443,
        "tls" : true,
        "weight" : 1,
        "backup" : false,
        "predicate" : {
          "type" : "AlwaysMatch"
        },
        "protocol" : "HTTP/1.1",
        "ip_address" : null,
        "tls_config" : {
          "certs" : [ ],
          "trusted_certs" : [ ],
          "enabled" : false,
          "loose" : false,
          "trust_all" : false
        }
      } ],
      "root" : "/",
      "rewrite" : false,
      "load_balancing" : {
        "type" : "RoundRobin"
      },
      "client" : {
        "retries" : 1,
        "max_errors" : 20,
        "retry_initial_delay" : 50,
        "backoff_factor" : 2,
        "call_timeout" : 30000,
        "call_and_stream_timeout" : 120000,
        "connection_timeout" : 10000,
        "idle_timeout" : 60000,
        "global_timeout" : 30000,
        "sample_interval" : 2000,
        "proxy" : { },
        "custom_timeouts" : [ ],
        "cache_connection_settings" : {
          "enabled" : false,
          "queue_size" : 2048
        }
      },
      "health_check" : {
        "enabled" : false,
        "url" : "",
        "timeout" : 5000,
        "healthyStatuses" : [ ],
        "unhealthyStatuses" : [ ]
      }
    },
    "backend_ref" : null,
    "plugins" : [ {
      "enabled" : true,
      "debug" : false,
      "plugin" : "cp:otoroshi.next.plugins.OverrideHost",
      "include" : [ ],
      "exclude" : [ ],
      "config" : { },
      "bound_listeners" : [ ],
      "plugin_index" : {
        "transform_request" : 0
      }
    }, {
      "enabled" : true,
      "debug" : false,
      "plugin" : "cp:otoroshi_plugins.com.cloud.apim.otoroshi.extensions.aigateway.plugins.OpenAiCompatProxy",
      "include" : [ ],
      "exclude" : [ ],
      "config" : {
        "refs" : [ "provider_10bbc76d-7cd8-4cb7-b760-61e749a1b691" ]
      },
      "bound_listeners" : [ ],
      "plugin_index" : { }
    } ]
  },
  "input_prompt" : [ {
    "role" : "user",
    "content" : "tell me a joke"
  } ],
  "output" : {
    "generations" : [ {
      "message" : {
        "role" : "assistant",
        "content" : "Why did the scarecrow win an award?\n\nBecause he was outstanding in his field!"
      }
    } ],
    "metadata" : {
      "rate_limit" : {
        "requests_limit" : 10000,
        "requests_remaining" : 9999,
        "tokens_limit" : 200000,
        "tokens_remaining" : 199993
      },
      "usage" : {
        "prompt_tokens" : 11,
        "generation_tokens" : 18,
        "reasoning_tokens" : 0
      },
      "costs" : {
        "input_cost" : 0.00000165,
        "output_cost" : 0.0000108,
        "reasoning_cost" : 0,
        "total_cost" : 0.00001245,
        "currency" : "dollar"
      }
    }
  },
  "provider_details" : {
    "_loc" : {
      "tenant" : "default",
      "teams" : [ "default" ]
    },
    "id" : "provider_10bbc76d-7cd8-4cb7-b760-61e749a1b691",
    "name" : "OpenAI clean",
    "description" : "An OpenAI LLM api provider",
    "metadata" : {
      "created_at" : "2025-03-28T10:10:51.558+01:00"
    },
    "tags" : [ ],
    "provider" : "openai",
    "connection" : {
      "base_url" : "https://api.openai.com/v1",
      "token" : "xxx",
      "timeout" : 30000
    },
    "options" : {
      "model" : "gpt-4o-mini",
      "frequency_penalty" : null,
      "logit_bias" : null,
      "logprobs" : null,
      "top_logprobs" : null,
      "max_tokens" : null,
      "n" : 1,
      "presence_penalty" : null,
      "response_format" : null,
      "seed" : null,
      "stop" : null,
      "stream" : false,
      "temperature" : 1,
      "top_p" : 1,
      "tools" : null,
      "tool_choice" : null,
      "user" : null,
      "wasm_tools" : [ ],
      "mcp_connectors" : [ ],
      "allow_config_override" : true
    },
    "provider_fallback" : null,
    "context" : {
      "default" : null,
      "contexts" : [ ]
    },
    "models" : {
      "include" : [ ],
      "exclude" : [ ]
    },
    "guardrails" : [ ],
    "guardrails_fail_on_deny" : false,
    "cache" : {
      "strategy" : "none",
      "ttl" : 300000,
      "score" : 0.8
    }
  },
  "costs": {
    "input_cost": 0.00000165,
    "output_cost": 0.0000108,
    "reasoning_cost": 0,
    "total_cost": 0.00001245,
    "currency": "dollar"
  }
}

💰 Cost tracking

Configuration​

Example of costs tracking embed in responses​

Example of LLMUsageAudit event with cost tracking​

Dashboard example​

Configuration

Example of costs tracking embed in responses

Example of LLMUsageAudit event with cost tracking

Dashboard example