Spaces:

neural-thinker
/

cidadao.ai-backend

Paused

cidadao.ai-backend / monitoring /grafana /dashboards /cidadao-ai-agents.json

anderson-ufrj

feat: implement comprehensive monitoring and observability stack

c97e35f 4 months ago

9.34 kB

	{
	"dashboard": {
	"id": null,
	"title": "Cidadão.AI - Agent Performance",
	"tags": ["cidadao-ai", "agents"],
	"style": "dark",
	"timezone": "browser",
	"editable": true,
	"hideControls": false,
	"graphTooltip": 1,
	"time": {
	"from": "now-1h",
	"to": "now"
	},
	"timepicker": {
	"refresh_intervals": ["5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d"],
	"time_options": ["5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d"]
	},
	"refresh": "30s",
	"panels": [
	{
	"id": 1,
	"title": "Agent Task Success Rate",
	"type": "stat",
	"targets": [
	{
	"expr": "sum(rate(cidadao_ai_agent_tasks_total{status=\"success\"}[5m])) / sum(rate(cidadao_ai_agent_tasks_total[5m])) * 100",
	"legendFormat": "Overall Success Rate"
	}
	],
	"gridPos": {
	"h": 4,
	"w": 6,
	"x": 0,
	"y": 0
	},
	"fieldConfig": {
	"defaults": {
	"unit": "percent",
	"thresholds": {
	"steps": [
	{"color": "red", "value": 0},
	{"color": "yellow", "value": 90},
	{"color": "green", "value": 95}
	]
	}
	}
	}
	},
	{
	"id": 2,
	"title": "Active Agents",
	"type": "stat",
	"targets": [
	{
	"expr": "count(count by (agent_name) (cidadao_ai_agent_tasks_total))",
	"legendFormat": "Active Agents"
	}
	],
	"gridPos": {
	"h": 4,
	"w": 6,
	"x": 6,
	"y": 0
	}
	},
	{
	"id": 3,
	"title": "Average Task Duration",
	"type": "stat",
	"targets": [
	{
	"expr": "avg(cidadao_ai_agent_task_duration_seconds)",
	"legendFormat": "Avg Duration"
	}
	],
	"gridPos": {
	"h": 4,
	"w": 6,
	"x": 12,
	"y": 0
	},
	"fieldConfig": {
	"defaults": {
	"unit": "s",
	"thresholds": {
	"steps": [
	{"color": "green", "value": 0},
	{"color": "yellow", "value": 5},
	{"color": "red", "value": 10}
	]
	}
	}
	}
	},
	{
	"id": 4,
	"title": "Reflection Iterations",
	"type": "stat",
	"targets": [
	{
	"expr": "avg(cidadao_ai_agent_reflection_iterations)",
	"legendFormat": "Avg Reflections"
	}
	],
	"gridPos": {
	"h": 4,
	"w": 6,
	"x": 18,
	"y": 0
	}
	},
	{
	"id": 5,
	"title": "Agent Performance by Type",
	"type": "graph",
	"targets": [
	{
	"expr": "rate(cidadao_ai_agent_tasks_total{status=\"success\"}[5m])",
	"legendFormat": "{{agent_name}} - Success"
	},
	{
	"expr": "rate(cidadao_ai_agent_tasks_total{status=\"error\"}[5m])",
	"legendFormat": "{{agent_name}} - Error"
	}
	],
	"gridPos": {
	"h": 8,
	"w": 12,
	"x": 0,
	"y": 4
	},
	"legend": {
	"show": true,
	"values": true,
	"current": true,
	"alignAsTable": true
	}
	},
	{
	"id": 6,
	"title": "Task Duration by Agent",
	"type": "graph",
	"targets": [
	{
	"expr": "histogram_quantile(0.95, rate(cidadao_ai_agent_task_duration_seconds_bucket[5m]))",
	"legendFormat": "{{agent_name}} - P95"
	},
	{
	"expr": "histogram_quantile(0.50, rate(cidadao_ai_agent_task_duration_seconds_bucket[5m]))",
	"legendFormat": "{{agent_name}} - P50"
	}
	],
	"gridPos": {
	"h": 8,
	"w": 12,
	"x": 12,
	"y": 4
	},
	"yAxes": [
	{
	"label": "Duration (seconds)",
	"min": 0
	},
	{
	"show": false
	}
	]
	},
	{
	"id": 7,
	"title": "Agent Status Distribution",
	"type": "piechart",
	"targets": [
	{
	"expr": "sum by (status) (cidadao_ai_agent_tasks_total)",
	"legendFormat": "{{status}}"
	}
	],
	"gridPos": {
	"h": 8,
	"w": 8,
	"x": 0,
	"y": 12
	}
	},
	{
	"id": 8,
	"title": "Top Performing Agents",
	"type": "table",
	"targets": [
	{
	"expr": "topk(10, sum by (agent_name) (rate(cidadao_ai_agent_tasks_total{status=\"success\"}[1h])))",
	"legendFormat": "{{agent_name}}"
	}
	],
	"gridPos": {
	"h": 8,
	"w": 8,
	"x": 8,
	"y": 12
	},
	"transformations": [
	{
	"id": "organize",
	"options": {
	"renameByName": {
	"agent_name": "Agent",
	"Value": "Tasks/hour"
	}
	}
	}
	]
	},
	{
	"id": 9,
	"title": "Agent Error Distribution",
	"type": "table",
	"targets": [
	{
	"expr": "sum by (agent_name, error_type) (cidadao_ai_agent_errors_total)",
	"legendFormat": "{{agent_name}} - {{error_type}}"
	}
	],
	"gridPos": {
	"h": 8,
	"w": 8,
	"x": 16,
	"y": 12
	}
	},
	{
	"id": 10,
	"title": "Zumbi Agent - Anomaly Detection",
	"type": "graph",
	"targets": [
	{
	"expr": "rate(cidadao_ai_anomalies_detected_total{agent=\"zumbi\"}[5m])",
	"legendFormat": "{{anomaly_type}}"
	}
	],
	"gridPos": {
	"h": 6,
	"w": 12,
	"x": 0,
	"y": 20
	}
	},
	{
	"id": 11,
	"title": "Anita Agent - Analysis Accuracy",
	"type": "stat",
	"targets": [
	{
	"expr": "avg(cidadao_ai_analysis_accuracy{agent=\"anita\"})",
	"legendFormat": "Analysis Accuracy"
	}
	],
	"gridPos": {
	"h": 6,
	"w": 6,
	"x": 12,
	"y": 20
	},
	"fieldConfig": {
	"defaults": {
	"unit": "percent",
	"min": 0,
	"max": 100
	}
	}
	},
	{
	"id": 12,
	"title": "Tiradentes Agent - Report Generation",
	"type": "stat",
	"targets": [
	{
	"expr": "sum(rate(cidadao_ai_reports_generated_total{agent=\"tiradentes\"}[1h]))",
	"legendFormat": "Reports/hour"
	}
	],
	"gridPos": {
	"h": 6,
	"w": 6,
	"x": 18,
	"y": 20
	}
	},
	{
	"id": 13,
	"title": "Agent Memory Usage",
	"type": "graph",
	"targets": [
	{
	"expr": "cidadao_ai_agent_memory_usage_bytes",
	"legendFormat": "{{agent_name}}"
	}
	],
	"gridPos": {
	"h": 6,
	"w": 12,
	"x": 0,
	"y": 26
	},
	"yAxes": [
	{
	"label": "Memory (bytes)",
	"min": 0
	},
	{
	"show": false
	}
	]
	},
	{
	"id": 14,
	"title": "Agent Communication Matrix",
	"type": "heatmap",
	"targets": [
	{
	"expr": "sum by (source_agent, target_agent) (cidadao_ai_agent_messages_total)",
	"legendFormat": "{{source_agent}} -> {{target_agent}}"
	}
	],
	"gridPos": {
	"h": 6,
	"w": 12,
	"x": 12,
	"y": 26
	}
	},
	{
	"id": 15,
	"title": "Quality Score Trends",
	"type": "graph",
	"targets": [
	{
	"expr": "avg_over_time(cidadao_ai_agent_quality_score[30m])",
	"legendFormat": "{{agent_name}}"
	}
	],
	"gridPos": {
	"h": 6,
	"w": 24,
	"x": 0,
	"y": 32
	},
	"yAxes": [
	{
	"label": "Quality Score",
	"min": 0,
	"max": 1
	},
	{
	"show": false
	}
	],
	"alert": {
	"conditions": [
	{
	"evaluator": {
	"params": [0.8],
	"type": "lt"
	},
	"operator": {
	"type": "and"
	},
	"query": {
	"params": ["A", "5m", "now"]
	},
	"reducer": {
	"params": [],
	"type": "avg"
	},
	"type": "query"
	}
	],
	"executionErrorState": "alerting",
	"for": "3m",
	"frequency": "30s",
	"handler": 1,
	"name": "Agent Quality Score Below Threshold",
	"noDataState": "no_data",
	"notifications": []
	}
	}
	]
	}
	}