cidadao.ai-backend / docs /architecture /MARITACA_OPTIMIZATION_GUIDE.md
anderson-ufrj
refactor: complete repository reorganization and documentation update
92d464e
# 🚀 Guia de Otimização Maritaca AI - Cidadão.AI
## Resumo das Melhorias
### 1. Novo Endpoint Otimizado
- **URL**: `/api/v1/chat/optimized`
- **Modelo**: Sabiazinho-3 (mais econômico)
- **Persona**: Carlos Drummond de Andrade
- **Economia**: ~40-50% menor custo por requisição
### 2. Comparação de Modelos
| Modelo | Custo | Qualidade | Tempo Resposta | Uso Recomendado |
|--------|-------|-----------|----------------|-----------------|
| Sabiazinho-3 | 💰 | ⭐⭐⭐⭐ | 1-5s | Conversas gerais, saudações |
| Sabiá-3 | 💰💰💰 | ⭐⭐⭐⭐⭐ | 3-15s | Análises complexas |
### 3. Endpoints Disponíveis
```bash
# 1. Simple (Sabiá-3) - FUNCIONANDO 100%
POST /api/v1/chat/simple
# 2. Stable (Multi-fallback) - NOVO
POST /api/v1/chat/stable
# 3. Optimized (Sabiazinho-3 + Drummond) - NOVO
POST /api/v1/chat/optimized
```
## Integração Frontend - Versão Otimizada
### Serviço de Chat Atualizado
```typescript
// services/chatService.ts
export interface ChatEndpoint {
url: string;
name: string;
priority: number;
model: string;
}
export class ChatService {
private readonly API_URL = process.env.NEXT_PUBLIC_API_URL
private endpoints: ChatEndpoint[] = [
{
url: '/api/v1/chat/optimized',
name: 'Optimized (Sabiazinho)',
priority: 1,
model: 'sabiazinho-3'
},
{
url: '/api/v1/chat/simple',
name: 'Simple (Sabiá-3)',
priority: 2,
model: 'sabia-3'
},
{
url: '/api/v1/chat/stable',
name: 'Stable (Fallback)',
priority: 3,
model: 'mixed'
}
]
async sendMessage(
message: string,
options?: {
preferredModel?: 'economic' | 'quality';
useDrummond?: boolean;
}
): Promise<ChatResponse> {
const sessionId = `session_${Date.now()}`
// Select endpoint based on preference
let selectedEndpoints = [...this.endpoints]
if (options?.preferredModel === 'economic') {
// Prioritize Sabiazinho
selectedEndpoints.sort((a, b) =>
a.model === 'sabiazinho-3' ? -1 : 1
)
} else if (options?.preferredModel === 'quality') {
// Prioritize Sabiá-3
selectedEndpoints.sort((a, b) =>
a.model === 'sabia-3' ? -1 : 1
)
}
// Try endpoints in order
for (const endpoint of selectedEndpoints) {
try {
const body: any = { message, session_id: sessionId }
// Add Drummond flag for optimized endpoint
if (endpoint.url.includes('optimized')) {
body.use_drummond = options?.useDrummond ?? true
}
const response = await fetch(`${this.API_URL}${endpoint.url}`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body)
})
if (response.ok) {
const data = await response.json()
console.log(`✅ Success with ${endpoint.name}`)
return data
}
} catch (error) {
console.warn(`Failed ${endpoint.name}:`, error)
}
}
// Ultimate fallback
return {
message: 'Desculpe, estou temporariamente indisponível.',
session_id: sessionId,
agent_name: 'Sistema',
agent_id: 'system',
confidence: 0,
metadata: { fallback: true }
}
}
// Analyze message to decide best model
analyzeComplexity(message: string): 'simple' | 'complex' {
const complexKeywords = [
'analise', 'investigue', 'compare', 'tendência',
'padrão', 'anomalia', 'detalhe', 'relatório'
]
const hasComplexKeyword = complexKeywords.some(
keyword => message.toLowerCase().includes(keyword)
)
return hasComplexKeyword || message.length > 100
? 'complex'
: 'simple'
}
}
```
### Componente Inteligente
```tsx
// components/SmartChat.tsx
export function SmartChat() {
const [messages, setMessages] = useState<Message[]>([])
const [modelPreference, setModelPreference] = useState<'auto' | 'economic' | 'quality'>('auto')
const chatService = new ChatService()
const handleSendMessage = async (text: string) => {
// Add user message
const userMessage = createUserMessage(text)
setMessages(prev => [...prev, userMessage])
// Analyze complexity for auto mode
let preference: 'economic' | 'quality' | undefined
if (modelPreference === 'auto') {
const complexity = chatService.analyzeComplexity(text)
preference = complexity === 'simple' ? 'economic' : 'quality'
} else if (modelPreference !== 'auto') {
preference = modelPreference
}
// Send with appropriate model
const response = await chatService.sendMessage(text, {
preferredModel: preference,
useDrummond: true // Enable cultural persona
})
// Add response
const assistantMessage = {
...createAssistantMessage(response),
metadata: {
...response.metadata,
model_preference: preference,
actual_model: response.model_used
}
}
setMessages(prev => [...prev, assistantMessage])
// Log for monitoring
logChatMetrics({
model_used: response.model_used,
response_time: response.metadata?.response_time_ms,
tokens: response.metadata?.tokens_used,
success: true
})
}
return (
<div className="smart-chat">
{/* Model preference selector */}
<div className="model-selector">
<label>Modo:</label>
<select
value={modelPreference}
onChange={(e) => setModelPreference(e.target.value as any)}
>
<option value="auto">Automático</option>
<option value="economic">Econômico (Sabiazinho)</option>
<option value="quality">Qualidade (Sabiá-3)</option>
</select>
</div>
{/* Chat messages */}
<MessageList messages={messages} />
{/* Input */}
<ChatInput onSend={handleSendMessage} />
{/* Status indicator */}
<ChatStatus
lastModel={messages[messages.length - 1]?.metadata?.actual_model}
preference={modelPreference}
/>
</div>
)
}
```
## Otimizações de Custo
### 1. Cache Inteligente
```typescript
class CachedChatService extends ChatService {
private cache = new Map<string, CachedResponse>()
async sendMessage(message: string, options?: any) {
// Check cache for common questions
const cacheKey = this.normalizeMessage(message)
const cached = this.cache.get(cacheKey)
if (cached && !this.isExpired(cached)) {
return {
...cached.response,
metadata: {
...cached.response.metadata,
from_cache: true
}
}
}
// Get fresh response
const response = await super.sendMessage(message, options)
// Cache if successful
if (response.confidence > 0.8) {
this.cache.set(cacheKey, {
response,
timestamp: Date.now()
})
}
return response
}
}
```
### 2. Batching de Requisições
```typescript
class BatchedChatService extends ChatService {
private queue: QueuedMessage[] = []
private timer: NodeJS.Timeout | null = null
async sendMessage(message: string, options?: any) {
return new Promise((resolve) => {
this.queue.push({ message, options, resolve })
if (!this.timer) {
this.timer = setTimeout(() => this.processBatch(), 100)
}
})
}
private async processBatch() {
const batch = this.queue.splice(0, 5) // Max 5 per batch
// Send all at once (if API supports)
const responses = await this.sendBatch(batch)
// Resolve individual promises
batch.forEach((item, index) => {
item.resolve(responses[index])
})
this.timer = null
}
}
```
## Métricas e Monitoramento
```typescript
// utils/chatMetrics.ts
export class ChatMetricsCollector {
private metrics = {
totalRequests: 0,
modelUsage: new Map<string, number>(),
avgResponseTime: 0,
totalTokens: 0,
errorRate: 0,
cacheHitRate: 0
}
recordMetric(data: ChatMetric) {
this.metrics.totalRequests++
// Track model usage
const model = data.model_used || 'unknown'
this.metrics.modelUsage.set(
model,
(this.metrics.modelUsage.get(model) || 0) + 1
)
// Update averages
this.updateAverages(data)
// Send to analytics (optional)
if (window.gtag) {
window.gtag('event', 'chat_interaction', {
model_used: model,
response_time: data.response_time,
success: !data.error
})
}
}
getCostEstimate(): number {
const sabiazinhoCost = 0.001 // per request
const sabia3Cost = 0.003 // per request
const sabiazinhoCount = this.metrics.modelUsage.get('sabiazinho-3') || 0
const sabia3Count = this.metrics.modelUsage.get('sabia-3') || 0
return (sabiazinhoCount * sabiazinhoCost) + (sabia3Count * sabia3Cost)
}
getReport() {
return {
...this.metrics,
estimatedCost: this.getCostEstimate(),
modelDistribution: Object.fromEntries(this.metrics.modelUsage)
}
}
}
```
## Recomendações de Uso
### Para o Frontend:
1. **Perguntas Simples/Saudações**: Use Sabiazinho (economic mode)
2. **Análises Complexas**: Use Sabiá-3 (quality mode)
3. **Auto Mode**: Deixa o sistema decidir baseado na complexidade
### Economia Estimada:
- Conversas simples: 40-50% economia usando Sabiazinho
- Mix típico (70% simples, 30% complexo): ~35% economia total
- Com cache: Adicional 10-20% economia
### Próximos Passos:
1. Implementar cache para perguntas frequentes
2. Adicionar análise de sentimento para ajustar tom
3. Criar dashboards de custo em tempo real
4. A/B testing entre modelos