Observability
Every interaction with a Large Language Model (LLM) generates crucial data that can be monitored, analyzed, and optimized.
Our LLM Gateway provides real-time tracking, security, and performance insights, acting as a centralized observability layer to streamline LLM interactions.
A gateway is the ultimate solution for managing, analyzing, and securing LLM traffic. By routing all requests through our LLM gateway, you gain:
📊 Key Metrics We Track
Every LLM request logs critical telemetry data, including:
- LLM Provider – Identify the AI service in use (e.g., OpenAI, Anthropic, Google Gemini).
- Model Version – Track which model (GPT-4, Claude, Gemini) is processing requests.
- Prompt Data – Log input prompts to analyze patterns & improve outputs.
- Response Data – Capture AI-generated outputs for debugging & quality control.
- Token Usage Metrics – Measure input/output token consumption to optimize performance.
- User Identity – Associate API usage with specific users for accountability.
- API Key Tracking – Monitor & secure API access to prevent unauthorized use.
- Authentication Tokens – Ensure session integrity & compliance.
- Connected User Sessions – Identify active users interacting with the model.
Unlike traditional monitoring tools, a gateway provides full-stack observability by capturing every LLM request before it reaches the provider.
This enables granular control, cost efficiency, and real-time insights.
Using data exporter
you can use otoroshi data exporters to extract LLM usage informations and send it to anything you like
just make sure to filter events on
{
"include": [{
"audit": "LLMUsageAudit"
}],
"exclude": []
}
LLMUsageAudit event
{
"@id": "1904913942551986550",
"@timestamp": 1743001850151,
"@type": "AuditEvent",
"@product": "otoroshi",
"@serviceId": "",
"@service": "Otoroshi",
"@env": "dev",
"audit": "LLMUsageAudit",
"provider_kind": "openai",
"provider": "provider_f98538b5-6d59-426c-8127-cb583a9fa763",
"duration": 960,
"model": "gpt-4o-mini",
"rate_limit": {
"requests_limit": 10000,
"requests_remaining": 9998,
"tokens_limit": 200000,
"tokens_remaining": 199992
},
"usage": {
"prompt_tokens": 14,
"generation_tokens": 39,
"reasoning_tokens": 0
},
"error": null,
"consumed_using": "chat/completion/blocking",
"user": null,
"apikey": {
"_loc": {
"tenant": "default",
"teams": [
"default"
]
},
"clientId": "the_client_id",
"clientName": "default-apikey",
"description": "the default apikey",
"authorizedGroup": "default",
"authorizedEntities": [
"group_default"
],
"authorizations": [
{
"kind": "group",
"id": "default"
}
],
"enabled": true,
"readOnly": false,
"allowClientIdOnly": false,
"throttlingQuota": 1,
"dailyQuota": 100000,
"monthlyQuota": 10000000,
"constrainedServicesOnly": false,
"restrictions": {
"enabled": false,
"allowLast": true,
"allowed": [],
"forbidden": [],
"notFound": []
},
"rotation": {
"enabled": false,
"rotationEvery": 744,
"gracePeriod": 168,
"nextSecret": null
},
"validUntil": null,
"tags": [],
"metadata": {
"updated_at": "2023-10-05T10:45:39.082+02:00",
"llm_tokens_limit": "201",
"llm_tokens_reset_after": "15000"
}
},
"route": {
"_loc": {
"tenant": "default",
"teams": [
"default"
]
},
"id": "route_ec4670a82-2ada-485a-955a-bb710a1d237c",
"name": "demo-llm-events",
"description": "A new route",
"tags": [],
"metadata": {
"created_at": "2025-03-26T16:04:40.661+01:00",
"updated_at": "2025-03-26T16:05:09.563+01:00"
},
"enabled": true,
"debug_flow": false,
"export_reporting": false,
"capture": false,
"groups": [
"default"
],
"bound_listeners": [],
"frontend": {
"domains": [
"demo-llm-events.oto.tools"
],
"strip_path": true,
"exact": false,
"headers": {},
"query": {},
"methods": []
},
"backend": {
"targets": [
{
"id": "target_1",
"hostname": "request.otoroshi.io",
"port": 443,
"tls": true,
"weight": 1,
"backup": false,
"predicate": {
"type": "AlwaysMatch"
},
"protocol": "HTTP/1.1",
"ip_address": null,
"tls_config": {
"certs": [],
"trusted_certs": [],
"enabled": false,
"loose": false,
"trust_all": false
}
}
],
"root": "/",
"rewrite": false,
"load_balancing": {
"type": "RoundRobin"
},
"client": {
"retries": 1,
"max_errors": 20,
"retry_initial_delay": 50,
"backoff_factor": 2,
"call_timeout": 30000,
"call_and_stream_timeout": 120000,
"connection_timeout": 10000,
"idle_timeout": 60000,
"global_timeout": 30000,
"sample_interval": 2000,
"proxy": {},
"custom_timeouts": [],
"cache_connection_settings": {
"enabled": false,
"queue_size": 2048
}
},
"health_check": {
"enabled": false,
"url": "",
"timeout": 5000,
"healthyStatuses": [],
"unhealthyStatuses": []
}
},
"backend_ref": null,
"plugins": [
{
"enabled": true,
"debug": false,
"plugin": "cp:otoroshi.next.plugins.OverrideHost",
"include": [],
"exclude": [],
"config": {},
"bound_listeners": [],
"plugin_index": {
"transform_request": 0
}
},
{
"enabled": true,
"debug": false,
"plugin": "cp:otoroshi_plugins.com.cloud.apim.otoroshi.extensions.aigateway.plugins.OpenAiCompatProxy",
"include": [],
"exclude": [],
"config": {
"refs": [
"provider_f98538b5-6d59-426c-8127-cb583a9fa763"
]
},
"bound_listeners": [],
"plugin_index": {}
},
{
"enabled": true,
"debug": false,
"plugin": "cp:otoroshi.next.plugins.ApikeyCalls",
"include": [],
"exclude": [],
"config": {
"extractors": {
"basic": {
"enabled": true,
"header_name": null,
"query_name": null
},
"custom_headers": {
"enabled": true,
"client_id_header_name": null,
"client_secret_header_name": null
},
"client_id": {
"enabled": true,
"header_name": null,
"query_name": null
},
"jwt": {
"enabled": true,
"secret_signed": true,
"keypair_signed": true,
"include_request_attrs": false,
"max_jwt_lifespan_sec": null,
"header_name": null,
"query_name": null,
"cookie_name": null
}
},
"routing": {
"enabled": false
},
"validate": true,
"mandatory": true,
"pass_with_user": false,
"wipe_backend_request": true,
"update_quotas": true
},
"bound_listeners": [],
"plugin_index": {
"validate_access": 0,
"transform_request": 1,
"match_route": 0
}
}
]
},
"input_prompt": [
{
"role": "user",
"content": "tell me a joke"
}
],
"output": {
"generations": [
{
"message": {
"role": "assistant",
"content": "Why don't skeletons fight each other?\n\nThey don't have the guts! 😄"
}
}
],
"metadata": {
"rate_limit": {
"requests_limit": 10000,
"requests_remaining": 9998,
"tokens_limit": 200000,
"tokens_remaining": 199992
},
"usage": {
"prompt_tokens": 14,
"generation_tokens": 39,
"reasoning_tokens": 0
}
}
},
"provider_details": {
"_loc": {
"tenant": "default",
"teams": [
"default"
]
},
"id": "provider_f98538b5-6d59-426c-8127-cb583a9fa763",
"name": "OpenAI provider",
"description": "An OpenAI LLM api provider",
"metadata": {
"created_at": "2024-07-26T09:43:41.850+02:00",
"updated_at": "2025-02-28T12:06:21.250+01:00"
},
"tags": [],
"provider": "openai",
"connection": {
"base_url": "https://api.openai.com",
"token": "${vault://local/openai-token}",
"timeout": 30000
},
"options": {
"model": "gpt-4o-mini",
"frequency_penalty": null,
"logit_bias": null,
"logprobs": null,
"top_logprobs": null,
"max_tokens": "10000",
"n": 1,
"presence_penalty": null,
"response_format": null,
"seed": null,
"stop": null,
"stream": false,
"temperature": 1,
"top_p": 1,
"tools": null,
"tool_choice": null,
"user": null,
"wasm_tools": [],
"mcp_connectors": [],
"allow_config_override": false
},
"provider_fallback": null,
"context": {
"default": null,
"contexts": []
},
"models": {
"include": [],
"exclude": []
},
"guardrails": [],
"guardrails_fail_on_deny": false,
"cache": {
"strategy": "none",
"ttl": 86400000,
"score": 0.8
}
},
"user-agent-details": null,
"origin-details": null,
"instance-number": 0,
"instance-name": "dev",
"instance-zone": "local",
"instance-region": "local",
"instance-dc": "local",
"instance-provider": "local",
"instance-rack": "local",
"cluster-mode": "Leader",
"cluster-name": "otoroshi-leader-dev"
}
Dashboarding
We can use LLMUsageAudit
events to build a dashboard.
For example, we built a Grafana Dashboard to display some tokens consumption and other metrics.