File size: 4,017 Bytes
c7fed4d 8275699 c7fed4d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""
API routes for agent performance metrics.
"""
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Response
from prometheus_client import CONTENT_TYPE_LATEST
from src.core import get_logger
from src.api.auth import User
from src.api.dependencies import get_current_user
from src.services.agent_metrics import agent_metrics_service
router = APIRouter()
logger = get_logger("api.agent_metrics")
@router.get("/agents/{agent_name}/stats")
async def get_agent_stats(
agent_name: str,
current_user: User = Depends(get_current_user)
):
"""Get detailed statistics for a specific agent."""
try:
stats = await agent_metrics_service.get_agent_stats(agent_name)
if stats.get("status") == "no_data":
raise HTTPException(
status_code=404,
detail=f"No metrics found for agent: {agent_name}"
)
return {
"status": "success",
"data": stats
}
except Exception as e:
logger.error(f"Error getting agent stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/agents/summary")
async def get_all_agents_summary(
current_user: User = Depends(get_current_user)
):
"""Get summary statistics for all agents."""
try:
summary = await agent_metrics_service.get_all_agents_summary()
return {
"status": "success",
"data": summary
}
except Exception as e:
logger.error(f"Error getting agents summary: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/prometheus")
async def get_prometheus_metrics():
"""
Expose metrics in Prometheus format.
This endpoint is typically not authenticated to allow Prometheus scraping.
"""
try:
metrics = agent_metrics_service.get_prometheus_metrics()
return Response(
content=metrics,
media_type=CONTENT_TYPE_LATEST,
headers={"Content-Type": CONTENT_TYPE_LATEST}
)
except Exception as e:
logger.error(f"Error generating Prometheus metrics: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/agents/{agent_name}/reset")
async def reset_agent_metrics(
agent_name: str,
current_user: User = Depends(get_current_user)
):
"""Reset metrics for a specific agent."""
try:
await agent_metrics_service.reset_metrics(agent_name)
return {
"status": "success",
"message": f"Metrics reset for agent: {agent_name}"
}
except Exception as e:
logger.error(f"Error resetting agent metrics: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/reset")
async def reset_all_metrics(
current_user: User = Depends(get_current_user)
):
"""Reset metrics for all agents."""
try:
await agent_metrics_service.reset_metrics()
return {
"status": "success",
"message": "All agent metrics have been reset"
}
except Exception as e:
logger.error(f"Error resetting all metrics: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/health")
async def metrics_health_check():
"""Check if metrics service is healthy."""
try:
# Get summary to verify service is working
summary = await agent_metrics_service.get_all_agents_summary()
return {
"status": "healthy",
"service": "agent_metrics",
"agents_tracked": summary.get("total_agents", 0),
"total_requests": summary.get("total_requests", 0)
}
except Exception as e:
logger.error(f"Metrics service health check failed: {e}")
return {
"status": "unhealthy",
"service": "agent_metrics",
"error": str(e)
} |