File size: 16,321 Bytes
c1e6a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
"""
Chat Data Integration Service
Connects chat agents with Portal da Transparência data
"""

from typing import Dict, List, Optional, Any
from datetime import datetime, date, timedelta
import re

from src.core import get_logger
from src.services.portal_transparencia_service import portal_transparencia
from src.services.maritaca_client import MaritacaClient, MaritacaModel
from src.core.config import settings

logger = get_logger(__name__)


class ChatDataIntegration:
    """Integrates chat requests with real government data."""
    
    def __init__(self):
        """Initialize the integration service."""
        self.portal = portal_transparencia
        self.ai_client = None
        self._init_ai_client()
        
    def _init_ai_client(self):
        """Initialize AI client for data interpretation."""
        api_key = getattr(settings, "maritaca_api_key", None)
        if api_key:
            api_key_value = api_key.get_secret_value() if hasattr(api_key, 'get_secret_value') else api_key
            self.ai_client = MaritacaClient(
                api_key=api_key_value,
                model=MaritacaModel.SABIAZINHO_3
            )
            
    async def process_user_query(self, message: str, context: Optional[Dict] = None) -> Dict[str, Any]:
        """
        Process user query and fetch relevant data.
        
        Args:
            message: User's message
            context: Optional conversation context
            
        Returns:
            Dict with data and formatted response
        """
        # Extract entities and intent from message
        entities = await self._extract_entities(message)
        
        # Determine data type to search
        data_type = self._determine_data_type(message)
        
        logger.info(f"Processing query - Type: {data_type}, Entities: {entities}")
        
        # Fetch relevant data
        try:
            if data_type == "contratos":
                data = await self._search_contracts(message, entities)
            elif data_type == "licitacoes":
                data = await self._search_biddings(message, entities)
            elif data_type == "despesas":
                data = await self._search_expenses(message, entities)
            elif data_type == "servidores":
                data = await self._search_servants(message, entities)
            elif data_type == "fornecedor":
                data = await self._get_supplier_details(message, entities)
            elif data_type == "analise":
                data = await self._analyze_patterns(message, entities)
            else:
                data = {"tipo": "desconhecido", "mensagem": "Não entendi que tipo de dados você procura"}
                
            # Format response with AI
            formatted_response = await self._format_response_with_ai(data, message)
            
            return {
                "data": data,
                "response": formatted_response,
                "entities": entities,
                "data_type": data_type
            }
            
        except Exception as e:
            logger.error(f"Error processing query: {e}")
            return {
                "data": None,
                "response": "Desculpe, tive um problema ao buscar os dados. Por favor, tente novamente.",
                "error": str(e)
            }
            
    async def _extract_entities(self, message: str) -> Dict[str, Any]:
        """Extract entities from user message."""
        entities = {}
        
        # Extract CNPJ
        cnpj_match = re.search(r'\b\d{2}\.?\d{3}\.?\d{3}/?\d{4}-?\d{2}\b', message)
        if cnpj_match:
            entities["cnpj"] = re.sub(r'[^\d]', '', cnpj_match.group())
            
        # Extract CPF
        cpf_match = re.search(r'\b\d{3}\.?\d{3}\.?\d{3}-?\d{2}\b', message)
        if cpf_match:
            entities["cpf"] = re.sub(r'[^\d]', '', cpf_match.group())
            
        # Extract dates
        date_patterns = [
            (r'\b(\d{1,2})/(\d{1,2})/(\d{4})\b', '%d/%m/%Y'),
            (r'\b(\d{4})-(\d{1,2})-(\d{1,2})\b', '%Y-%m-%d')
        ]
        
        for pattern, fmt in date_patterns:
            matches = re.findall(pattern, message)
            if matches:
                try:
                    if fmt == '%d/%m/%Y':
                        date_str = f"{matches[0][0]}/{matches[0][1]}/{matches[0][2]}"
                    else:
                        date_str = f"{matches[0][0]}-{matches[0][1]}-{matches[0][2]}"
                    entities["data"] = datetime.strptime(date_str, fmt).date()
                except:
                    pass
                    
        # Extract year
        year_match = re.search(r'\b(20\d{2})\b', message)
        if year_match and "data" not in entities:
            entities["ano"] = int(year_match.group(1))
            
        # Extract monetary values
        value_patterns = [
            r'R\$\s*([\d.,]+)',
            r'([\d.,]+)\s*reais',
            r'([\d.,]+)\s*mil\s*reais'
        ]
        
        for pattern in value_patterns:
            match = re.search(pattern, message, re.IGNORECASE)
            if match:
                value_str = match.group(1).replace('.', '').replace(',', '.')
                try:
                    value = float(value_str)
                    if 'mil' in message.lower():
                        value *= 1000
                    entities["valor"] = value
                except:
                    pass
                break
                
        # Extract agency/organization names
        org_keywords = ["ministério", "secretaria", "prefeitura", "governo", "órgão"]
        for keyword in org_keywords:
            pattern = rf'{keyword}\s+(?:de\s+|da\s+|do\s+)?([A-Za-zÀ-ú\s]+?)(?:\.|,|$)'
            match = re.search(pattern, message, re.IGNORECASE)
            if match:
                entities["orgao"] = match.group(1).strip()
                break
                
        return entities
        
    def _determine_data_type(self, message: str) -> str:
        """Determine what type of data the user is asking for."""
        message_lower = message.lower()
        
        # Keywords for each data type
        keywords = {
            "contratos": ["contrato", "contratos", "contratação", "contratações", "contratou", "contratado"],
            "licitacoes": ["licitação", "licitações", "pregão", "concorrência", "tomada de preço"],
            "despesas": ["despesa", "despesas", "gasto", "gastos", "pagamento", "pagamentos"],
            "servidores": ["servidor", "servidores", "funcionário", "funcionários", "salário", "remuneração"],
            "fornecedor": ["fornecedor", "fornecedores", "empresa", "cnpj"],
            "analise": ["análise", "analisar", "padrão", "padrões", "tendência", "evolução", "comparar"]
        }
        
        # Count matches for each type
        scores = {}
        for data_type, words in keywords.items():
            scores[data_type] = sum(1 for word in words if word in message_lower)
            
        # Return type with highest score
        if max(scores.values()) > 0:
            return max(scores, key=scores.get)
            
        # Default to contracts if no clear match
        return "contratos"
        
    async def _search_contracts(self, message: str, entities: Dict) -> Dict[str, Any]:
        """Search for contracts based on extracted entities."""
        # Build search parameters
        params = {}
        
        if "orgao" in entities:
            # TODO: Map organization name to code
            params["orgao"] = entities["orgao"]
            
        if "cnpj" in entities:
            params["cnpj_fornecedor"] = entities["cnpj"]
            
        if "data" in entities:
            # Search 30 days around the date
            params["data_inicial"] = entities["data"] - timedelta(days=30)
            params["data_final"] = entities["data"] + timedelta(days=30)
        elif "ano" in entities:
            params["data_inicial"] = date(entities["ano"], 1, 1)
            params["data_final"] = date(entities["ano"], 12, 31)
            
        if "valor" in entities:
            # Search 20% range around value
            params["valor_minimo"] = entities["valor"] * 0.8
            params["valor_maximo"] = entities["valor"] * 1.2
            
        # Search contracts
        result = await self.portal.search_contracts(**params, size=20)
        
        return {
            "tipo": "contratos",
            "dados": result["contratos"],
            "total": result["total"],
            "parametros": params
        }
        
    async def _search_biddings(self, message: str, entities: Dict) -> Dict[str, Any]:
        """Search for biddings based on extracted entities."""
        params = {}
        
        if "orgao" in entities:
            params["orgao"] = entities["orgao"]
            
        if "data" in entities:
            params["data_inicial"] = entities["data"] - timedelta(days=30)
            params["data_final"] = entities["data"] + timedelta(days=30)
        elif "ano" in entities:
            params["data_inicial"] = date(entities["ano"], 1, 1)
            params["data_final"] = date(entities["ano"], 12, 31)
            
        result = await self.portal.search_biddings(**params, size=20)
        
        return {
            "tipo": "licitacoes",
            "dados": result["licitacoes"],
            "total": result["total"],
            "parametros": params
        }
        
    async def _search_expenses(self, message: str, entities: Dict) -> Dict[str, Any]:
        """Search for expenses based on extracted entities."""
        params = {}
        
        if "orgao" in entities:
            params["orgao"] = entities["orgao"]
            
        # Determine month/year
        if "data" in entities:
            params["mes_ano"] = entities["data"].strftime("%m/%Y")
        elif "ano" in entities:
            # Get current month for the specified year
            params["mes_ano"] = f"{datetime.now().month:02d}/{entities['ano']}"
            
        result = await self.portal.search_expenses(**params, size=50)
        
        return {
            "tipo": "despesas",
            "dados": result["despesas"],
            "total": result["total"],
            "parametros": params
        }
        
    async def _search_servants(self, message: str, entities: Dict) -> Dict[str, Any]:
        """Search for public servants based on extracted entities."""
        params = {}
        
        # Extract name from message
        name_pattern = r'(?:servidor|funcionário)\s+([A-Za-zÀ-ú\s]+?)(?:\.|,|$|trabalha|recebe)'
        name_match = re.search(name_pattern, message, re.IGNORECASE)
        if name_match:
            params["nome"] = name_match.group(1).strip()
            
        if "cpf" in entities:
            params["cpf"] = entities["cpf"]
            
        if "orgao" in entities:
            params["orgao"] = entities["orgao"]
            
        result = await self.portal.search_public_servants(**params, size=20)
        
        return {
            "tipo": "servidores",
            "dados": result["servidores"],
            "total": result["total"],
            "parametros": params
        }
        
    async def _get_supplier_details(self, message: str, entities: Dict) -> Dict[str, Any]:
        """Get detailed supplier information."""
        if "cnpj" not in entities:
            return {
                "tipo": "fornecedor",
                "erro": "CNPJ não encontrado na mensagem"
            }
            
        result = await self.portal.get_supplier_info(entities["cnpj"])
        
        return {
            "tipo": "fornecedor",
            "dados": result,
            "cnpj": entities["cnpj"]
        }
        
    async def _analyze_patterns(self, message: str, entities: Dict) -> Dict[str, Any]:
        """Analyze spending patterns."""
        params = {}
        
        if "orgao" in entities:
            params["orgao"] = entities["orgao"]
            
        # Determine period
        if "ano" in entities:
            params["periodo_meses"] = 12
        else:
            params["periodo_meses"] = 6  # Default to 6 months
            
        result = await self.portal.analyze_spending_patterns(**params)
        
        return {
            "tipo": "analise",
            "dados": result
        }
        
    async def _format_response_with_ai(self, data: Dict, original_query: str) -> str:
        """Format the data response using AI."""
        if not self.ai_client or not data.get("dados"):
            return self._format_response_simple(data)
            
        try:
            # Prepare context for AI
            system_prompt = """Você é um assistente especializado em transparência pública.
            Sua tarefa é explicar dados governamentais de forma clara e acessível.
            Use linguagem simples, destaque informações importantes e sempre seja preciso com valores e datas.
            Se encontrar possíveis irregularidades, mencione-as de forma objetiva."""
            
            # Prepare data summary
            if data["tipo"] == "contratos":
                data_summary = f"Encontrei {data.get('total', 0)} contratos. "
                if data.get("dados"):
                    data_summary += "Aqui estão os principais: "
                    for i, contract in enumerate(data["dados"][:3]):
                        data_summary += f"\n{i+1}. {contract.get('objeto', 'Sem descrição')} - "
                        data_summary += f"R$ {contract.get('valorTotal', 0):,.2f} - "
                        data_summary += f"Fornecedor: {contract.get('nomeFantasiaFornecedor', 'Não informado')}"
                        
            elif data["tipo"] == "analise":
                analysis = data["dados"]
                data_summary = f"Análise de gastos do período {analysis['periodo']['inicio']} a {analysis['periodo']['fim']}: "
                data_summary += f"\n- Total de contratos: {analysis['total_contratos']}"
                data_summary += f"\n- Valor total: R$ {analysis['valor_total_contratos']:,.2f}"
                data_summary += f"\n- Fornecedores únicos: {analysis['fornecedores_unicos']}"
                if analysis.get("alertas"):
                    data_summary += "\n\nAlertas encontrados:"
                    for alert in analysis["alertas"]:
                        data_summary += f"\n⚠️ {alert['mensagem']}"
                        
            else:
                data_summary = f"Encontrei {data.get('total', 0)} registros do tipo {data['tipo']}"
                
            # Generate AI response
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"O usuário perguntou: '{original_query}'\n\nDados encontrados:\n{data_summary}\n\nExplique esses dados de forma clara e útil."}
            ]
            
            response = await self.ai_client.chat_completion(
                messages=messages,
                max_tokens=500,
                temperature=0.7
            )
            
            return response.content
            
        except Exception as e:
            logger.error(f"Error formatting with AI: {e}")
            return self._format_response_simple(data)
            
    def _format_response_simple(self, data: Dict) -> str:
        """Simple formatting without AI."""
        if not data.get("dados"):
            return "Não encontrei dados com os critérios especificados. Tente refinar sua busca."
            
        response = f"Encontrei {data.get('total', 0)} registros.\n\n"
        
        if data["tipo"] == "contratos" and data.get("dados"):
            response += "Principais contratos:\n"
            for i, contract in enumerate(data["dados"][:5], 1):
                response += f"{i}. {contract.get('objeto', 'Sem descrição')[:100]}...\n"
                response += f"   Valor: R$ {contract.get('valorTotal', 0):,.2f}\n"
                response += f"   Fornecedor: {contract.get('nomeFantasiaFornecedor', 'Não informado')}\n\n"
                
        return response


# Singleton instance
chat_data_integration = ChatDataIntegration()