anderson-ufrj
commited on
Commit
·
c78f128
1
Parent(s):
e9b5c2a
feat(agents): integrate dados.gov.br tool into Zumbi agent
Browse files- Add DadosGovTool integration for open data enrichment
- Create _enrich_with_open_data method to search related datasets
- Add enable_open_data_enrichment option to InvestigationRequest
- Include open_data_enrichment in agent capabilities
- Enrich contract data with available open datasets references
- src/agents/zumbi.py +76 -1
src/agents/zumbi.py
CHANGED
|
@@ -26,6 +26,7 @@ from src.core.monitoring import (
|
|
| 26 |
from src.tools.transparency_api import TransparencyAPIClient, TransparencyAPIFilter
|
| 27 |
from src.tools.models_client import ModelsClient, get_models_client
|
| 28 |
from src.ml.spectral_analyzer import SpectralAnalyzer, SpectralAnomaly
|
|
|
|
| 29 |
import time
|
| 30 |
|
| 31 |
|
|
@@ -53,6 +54,7 @@ class InvestigationRequest(BaseModel):
|
|
| 53 |
value_threshold: Optional[float] = PydanticField(default=None, description="Minimum value threshold for contracts")
|
| 54 |
anomaly_types: Optional[List[str]] = PydanticField(default=None, description="Specific types of anomalies to look for")
|
| 55 |
max_records: int = PydanticField(default=100, description="Maximum records to analyze")
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
class InvestigatorAgent(BaseAgent):
|
|
@@ -92,7 +94,8 @@ class InvestigatorAgent(BaseAgent):
|
|
| 92 |
"duplicate_contract_detection",
|
| 93 |
"payment_pattern_analysis",
|
| 94 |
"spectral_analysis",
|
| 95 |
-
"explainable_ai"
|
|
|
|
| 96 |
],
|
| 97 |
max_retries=3,
|
| 98 |
timeout=60
|
|
@@ -112,6 +115,9 @@ class InvestigatorAgent(BaseAgent):
|
|
| 112 |
# Initialize spectral analyzer for frequency-domain analysis (fallback)
|
| 113 |
self.spectral_analyzer = SpectralAnalyzer()
|
| 114 |
|
|
|
|
|
|
|
|
|
|
| 115 |
# Anomaly detection methods registry
|
| 116 |
self.anomaly_detectors = {
|
| 117 |
"price_anomaly": self._detect_price_anomalies,
|
|
@@ -202,6 +208,13 @@ class InvestigatorAgent(BaseAgent):
|
|
| 202 |
metadata={"investigation_id": context.investigation_id}
|
| 203 |
)
|
| 204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
# Run anomaly detection
|
| 206 |
anomalies = await self._run_anomaly_detection(
|
| 207 |
contracts_data,
|
|
@@ -371,6 +384,68 @@ class InvestigatorAgent(BaseAgent):
|
|
| 371 |
|
| 372 |
return all_contracts[:request.max_records]
|
| 373 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
async def _run_anomaly_detection(
|
| 375 |
self,
|
| 376 |
contracts_data: List[Dict[str, Any]],
|
|
|
|
| 26 |
from src.tools.transparency_api import TransparencyAPIClient, TransparencyAPIFilter
|
| 27 |
from src.tools.models_client import ModelsClient, get_models_client
|
| 28 |
from src.ml.spectral_analyzer import SpectralAnalyzer, SpectralAnomaly
|
| 29 |
+
from src.tools.dados_gov_tool import DadosGovTool
|
| 30 |
import time
|
| 31 |
|
| 32 |
|
|
|
|
| 54 |
value_threshold: Optional[float] = PydanticField(default=None, description="Minimum value threshold for contracts")
|
| 55 |
anomaly_types: Optional[List[str]] = PydanticField(default=None, description="Specific types of anomalies to look for")
|
| 56 |
max_records: int = PydanticField(default=100, description="Maximum records to analyze")
|
| 57 |
+
enable_open_data_enrichment: bool = PydanticField(default=True, description="Enable enrichment with dados.gov.br open data")
|
| 58 |
|
| 59 |
|
| 60 |
class InvestigatorAgent(BaseAgent):
|
|
|
|
| 94 |
"duplicate_contract_detection",
|
| 95 |
"payment_pattern_analysis",
|
| 96 |
"spectral_analysis",
|
| 97 |
+
"explainable_ai",
|
| 98 |
+
"open_data_enrichment"
|
| 99 |
],
|
| 100 |
max_retries=3,
|
| 101 |
timeout=60
|
|
|
|
| 115 |
# Initialize spectral analyzer for frequency-domain analysis (fallback)
|
| 116 |
self.spectral_analyzer = SpectralAnalyzer()
|
| 117 |
|
| 118 |
+
# Initialize dados.gov.br tool for accessing open data
|
| 119 |
+
self.dados_gov_tool = DadosGovTool()
|
| 120 |
+
|
| 121 |
# Anomaly detection methods registry
|
| 122 |
self.anomaly_detectors = {
|
| 123 |
"price_anomaly": self._detect_price_anomalies,
|
|
|
|
| 208 |
metadata={"investigation_id": context.investigation_id}
|
| 209 |
)
|
| 210 |
|
| 211 |
+
# Enrich data with open data information if available
|
| 212 |
+
if request.enable_open_data_enrichment:
|
| 213 |
+
contracts_data = await self._enrich_with_open_data(
|
| 214 |
+
contracts_data,
|
| 215 |
+
context
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
# Run anomaly detection
|
| 219 |
anomalies = await self._run_anomaly_detection(
|
| 220 |
contracts_data,
|
|
|
|
| 384 |
|
| 385 |
return all_contracts[:request.max_records]
|
| 386 |
|
| 387 |
+
async def _enrich_with_open_data(
|
| 388 |
+
self,
|
| 389 |
+
contracts_data: List[Dict[str, Any]],
|
| 390 |
+
context: AgentContext
|
| 391 |
+
) -> List[Dict[str, Any]]:
|
| 392 |
+
"""
|
| 393 |
+
Enrich contract data with information from dados.gov.br.
|
| 394 |
+
|
| 395 |
+
Args:
|
| 396 |
+
contracts_data: Contract records
|
| 397 |
+
context: Agent context
|
| 398 |
+
|
| 399 |
+
Returns:
|
| 400 |
+
Enriched contract data
|
| 401 |
+
"""
|
| 402 |
+
# Extract unique organizations from contracts
|
| 403 |
+
organizations = set()
|
| 404 |
+
for contract in contracts_data:
|
| 405 |
+
org_name = contract.get("orgao", {}).get("nome", "")
|
| 406 |
+
if org_name:
|
| 407 |
+
organizations.add(org_name)
|
| 408 |
+
|
| 409 |
+
# Search for related datasets for each organization
|
| 410 |
+
related_datasets = {}
|
| 411 |
+
for org_name in organizations:
|
| 412 |
+
try:
|
| 413 |
+
# Search for datasets from this organization
|
| 414 |
+
result = await self.dados_gov_tool._execute(
|
| 415 |
+
query=f"{org_name}, licitações, contratos",
|
| 416 |
+
action="search",
|
| 417 |
+
limit=5
|
| 418 |
+
)
|
| 419 |
+
|
| 420 |
+
if result.success and result.data:
|
| 421 |
+
related_datasets[org_name] = result.data.get("datasets", [])
|
| 422 |
+
|
| 423 |
+
self.logger.info(
|
| 424 |
+
"open_data_found",
|
| 425 |
+
organization=org_name,
|
| 426 |
+
datasets_count=len(related_datasets[org_name]),
|
| 427 |
+
investigation_id=context.investigation_id,
|
| 428 |
+
)
|
| 429 |
+
except Exception as e:
|
| 430 |
+
self.logger.warning(
|
| 431 |
+
"open_data_search_failed",
|
| 432 |
+
organization=org_name,
|
| 433 |
+
error=str(e),
|
| 434 |
+
investigation_id=context.investigation_id,
|
| 435 |
+
)
|
| 436 |
+
|
| 437 |
+
# Enrich contracts with open data references
|
| 438 |
+
for contract in contracts_data:
|
| 439 |
+
org_name = contract.get("orgao", {}).get("nome", "")
|
| 440 |
+
if org_name in related_datasets:
|
| 441 |
+
contract["_open_data_available"] = True
|
| 442 |
+
contract["_related_datasets"] = related_datasets[org_name]
|
| 443 |
+
else:
|
| 444 |
+
contract["_open_data_available"] = False
|
| 445 |
+
contract["_related_datasets"] = []
|
| 446 |
+
|
| 447 |
+
return contracts_data
|
| 448 |
+
|
| 449 |
async def _run_anomaly_detection(
|
| 450 |
self,
|
| 451 |
contracts_data: List[Dict[str, Any]],
|