anderson-ufrj commited on
Commit
c78f128
·
1 Parent(s): e9b5c2a

feat(agents): integrate dados.gov.br tool into Zumbi agent

Browse files

- Add DadosGovTool integration for open data enrichment
- Create _enrich_with_open_data method to search related datasets
- Add enable_open_data_enrichment option to InvestigationRequest
- Include open_data_enrichment in agent capabilities
- Enrich contract data with available open datasets references

Files changed (1) hide show
  1. src/agents/zumbi.py +76 -1
src/agents/zumbi.py CHANGED
@@ -26,6 +26,7 @@ from src.core.monitoring import (
26
  from src.tools.transparency_api import TransparencyAPIClient, TransparencyAPIFilter
27
  from src.tools.models_client import ModelsClient, get_models_client
28
  from src.ml.spectral_analyzer import SpectralAnalyzer, SpectralAnomaly
 
29
  import time
30
 
31
 
@@ -53,6 +54,7 @@ class InvestigationRequest(BaseModel):
53
  value_threshold: Optional[float] = PydanticField(default=None, description="Minimum value threshold for contracts")
54
  anomaly_types: Optional[List[str]] = PydanticField(default=None, description="Specific types of anomalies to look for")
55
  max_records: int = PydanticField(default=100, description="Maximum records to analyze")
 
56
 
57
 
58
  class InvestigatorAgent(BaseAgent):
@@ -92,7 +94,8 @@ class InvestigatorAgent(BaseAgent):
92
  "duplicate_contract_detection",
93
  "payment_pattern_analysis",
94
  "spectral_analysis",
95
- "explainable_ai"
 
96
  ],
97
  max_retries=3,
98
  timeout=60
@@ -112,6 +115,9 @@ class InvestigatorAgent(BaseAgent):
112
  # Initialize spectral analyzer for frequency-domain analysis (fallback)
113
  self.spectral_analyzer = SpectralAnalyzer()
114
 
 
 
 
115
  # Anomaly detection methods registry
116
  self.anomaly_detectors = {
117
  "price_anomaly": self._detect_price_anomalies,
@@ -202,6 +208,13 @@ class InvestigatorAgent(BaseAgent):
202
  metadata={"investigation_id": context.investigation_id}
203
  )
204
 
 
 
 
 
 
 
 
205
  # Run anomaly detection
206
  anomalies = await self._run_anomaly_detection(
207
  contracts_data,
@@ -371,6 +384,68 @@ class InvestigatorAgent(BaseAgent):
371
 
372
  return all_contracts[:request.max_records]
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  async def _run_anomaly_detection(
375
  self,
376
  contracts_data: List[Dict[str, Any]],
 
26
  from src.tools.transparency_api import TransparencyAPIClient, TransparencyAPIFilter
27
  from src.tools.models_client import ModelsClient, get_models_client
28
  from src.ml.spectral_analyzer import SpectralAnalyzer, SpectralAnomaly
29
+ from src.tools.dados_gov_tool import DadosGovTool
30
  import time
31
 
32
 
 
54
  value_threshold: Optional[float] = PydanticField(default=None, description="Minimum value threshold for contracts")
55
  anomaly_types: Optional[List[str]] = PydanticField(default=None, description="Specific types of anomalies to look for")
56
  max_records: int = PydanticField(default=100, description="Maximum records to analyze")
57
+ enable_open_data_enrichment: bool = PydanticField(default=True, description="Enable enrichment with dados.gov.br open data")
58
 
59
 
60
  class InvestigatorAgent(BaseAgent):
 
94
  "duplicate_contract_detection",
95
  "payment_pattern_analysis",
96
  "spectral_analysis",
97
+ "explainable_ai",
98
+ "open_data_enrichment"
99
  ],
100
  max_retries=3,
101
  timeout=60
 
115
  # Initialize spectral analyzer for frequency-domain analysis (fallback)
116
  self.spectral_analyzer = SpectralAnalyzer()
117
 
118
+ # Initialize dados.gov.br tool for accessing open data
119
+ self.dados_gov_tool = DadosGovTool()
120
+
121
  # Anomaly detection methods registry
122
  self.anomaly_detectors = {
123
  "price_anomaly": self._detect_price_anomalies,
 
208
  metadata={"investigation_id": context.investigation_id}
209
  )
210
 
211
+ # Enrich data with open data information if available
212
+ if request.enable_open_data_enrichment:
213
+ contracts_data = await self._enrich_with_open_data(
214
+ contracts_data,
215
+ context
216
+ )
217
+
218
  # Run anomaly detection
219
  anomalies = await self._run_anomaly_detection(
220
  contracts_data,
 
384
 
385
  return all_contracts[:request.max_records]
386
 
387
+ async def _enrich_with_open_data(
388
+ self,
389
+ contracts_data: List[Dict[str, Any]],
390
+ context: AgentContext
391
+ ) -> List[Dict[str, Any]]:
392
+ """
393
+ Enrich contract data with information from dados.gov.br.
394
+
395
+ Args:
396
+ contracts_data: Contract records
397
+ context: Agent context
398
+
399
+ Returns:
400
+ Enriched contract data
401
+ """
402
+ # Extract unique organizations from contracts
403
+ organizations = set()
404
+ for contract in contracts_data:
405
+ org_name = contract.get("orgao", {}).get("nome", "")
406
+ if org_name:
407
+ organizations.add(org_name)
408
+
409
+ # Search for related datasets for each organization
410
+ related_datasets = {}
411
+ for org_name in organizations:
412
+ try:
413
+ # Search for datasets from this organization
414
+ result = await self.dados_gov_tool._execute(
415
+ query=f"{org_name}, licitações, contratos",
416
+ action="search",
417
+ limit=5
418
+ )
419
+
420
+ if result.success and result.data:
421
+ related_datasets[org_name] = result.data.get("datasets", [])
422
+
423
+ self.logger.info(
424
+ "open_data_found",
425
+ organization=org_name,
426
+ datasets_count=len(related_datasets[org_name]),
427
+ investigation_id=context.investigation_id,
428
+ )
429
+ except Exception as e:
430
+ self.logger.warning(
431
+ "open_data_search_failed",
432
+ organization=org_name,
433
+ error=str(e),
434
+ investigation_id=context.investigation_id,
435
+ )
436
+
437
+ # Enrich contracts with open data references
438
+ for contract in contracts_data:
439
+ org_name = contract.get("orgao", {}).get("nome", "")
440
+ if org_name in related_datasets:
441
+ contract["_open_data_available"] = True
442
+ contract["_related_datasets"] = related_datasets[org_name]
443
+ else:
444
+ contract["_open_data_available"] = False
445
+ contract["_related_datasets"] = []
446
+
447
+ return contracts_data
448
+
449
  async def _run_anomaly_detection(
450
  self,
451
  contracts_data: List[Dict[str, Any]],