Skip to content

Quickstart

from mada_modelkit import AgentRequest, AgentResponse
# Import a provider
from mada_modelkit.providers.cloud.openai import OpenAIClient
async def main():
async with OpenAIClient(
api_key="sk-...",
model="gpt-4o",
) as client:
response = await client.send_request(
AgentRequest(prompt="Explain TCP in one sentence.")
)
print(response.content)

Middleware wraps any client. Stack them in any order:

from mada_modelkit import (
RetryMiddleware,
CircuitBreakerMiddleware,
CachingMiddleware,
TrackingMiddleware,
)
from mada_modelkit.providers.cloud.openai import OpenAIClient
# Build the stack: tracking → cache → circuit breaker → retry → provider
provider = OpenAIClient(api_key="sk-...", model="gpt-4o")
retry = RetryMiddleware(provider, max_retries=3, backoff_base=1.0)
circuit = CircuitBreakerMiddleware(retry, failure_threshold=5)
cache = CachingMiddleware(circuit, ttl=3600.0)
client = TrackingMiddleware(cache)
# Use it like any other client
response = await client.send_request(AgentRequest(prompt="Hello"))
# Check stats
print(f"Requests: {client.stats.total_requests}")
print(f"Tokens: {client.stats.total_input_tokens + client.stats.total_output_tokens}")
from mada_modelkit import AgentRequest
async for chunk in client.send_request_stream(AgentRequest(prompt="Tell a story")):
print(chunk.delta, end="", flush=True)
if chunk.is_final:
print() # newline at end
from mada_modelkit import FallbackMiddleware
from mada_modelkit.providers.cloud.openai import OpenAIClient
from mada_modelkit.providers.cloud.anthropic import AnthropicClient
primary = OpenAIClient(api_key="sk-...", model="gpt-4o")
fallback = AnthropicClient(api_key="sk-ant-...", model="claude-sonnet-4-6")
client = FallbackMiddleware(primary, [fallback])
# If OpenAI fails, automatically tries Anthropic
response = await client.send_request(AgentRequest(prompt="Hello"))
from mada_modelkit.providers.local_server.ollama import OllamaClient
async with OllamaClient(model="llama3.1") as client:
response = await client.send_request(
AgentRequest(prompt="What is WACP?")
)
print(response.content)
from mada_modelkit.providers.native.llamacpp import LlamaCppClient
async with LlamaCppClient(model_path="/path/to/model.gguf") as client:
response = await client.send_request(
AgentRequest(prompt="Hello", max_tokens=100)
)
print(response.content)