File size: 18,372 Bytes
824bf31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bcdb25
 
 
 
 
 
824bf31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc1e705
 
 
 
 
 
 
f317ea3
 
dc1e705
 
824bf31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eccaf5b
 
 
824bf31
eccaf5b
824bf31
eccaf5b
 
824bf31
 
 
 
 
eccaf5b
824bf31
 
 
eccaf5b
 
 
 
 
 
 
 
 
 
 
824bf31
 
 
 
 
 
f70869e
 
 
 
 
824bf31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190953c
 
 
 
 
 
 
 
 
 
824bf31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bcdb25
 
824bf31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
"""
Module: core.config
Description: Application configuration management
Author: Anderson H. Silva
Date: 2025-01-24
License: Proprietary - All rights reserved
"""

from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Optional
import asyncio
import os

from pydantic import Field, SecretStr, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

# Import will be available after initialization
from .secret_manager import SecretManager
from .vault_client import VaultConfig


class Settings(BaseSettings):
    """Application settings with environment variable support."""
    
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
        extra="ignore",
    )
    
    # Application
    app_name: str = Field(default="cidadao-ai", description="Application name")
    app_env: str = Field(default="development", description="Environment")
    app_version: str = Field(default="1.0.0", description="Version")
    debug: bool = Field(default=False, description="Debug mode")
    log_level: str = Field(default="INFO", description="Logging level")
    
    # Server
    host: str = Field(default="0.0.0.0", description="Server host")
    port: int = Field(default=8000, description="Server port")
    workers: int = Field(default=1, description="Number of workers")
    
    # Database
    database_url: str = Field(
        description="Database connection URL (REQUIRED)"
    )
    database_pool_size: int = Field(default=10, description="DB pool size")
    database_pool_overflow: int = Field(default=20, description="DB pool overflow")
    database_pool_timeout: int = Field(default=30, description="DB pool timeout")
    
    # Redis
    redis_url: str = Field(
        default="redis://localhost:6379/0",
        description="Redis connection URL"
    )
    redis_password: Optional[SecretStr] = Field(default=None, description="Redis password")
    redis_pool_size: int = Field(default=10, description="Redis pool size")
    
    # Portal Transparência API
    transparency_api_key: Optional[SecretStr] = Field(
        default=None,
        description="Portal da Transparência API key"
    )
    transparency_api_base_url: str = Field(
        default="https://api.portaldatransparencia.gov.br",
        description="Portal da Transparência base URL"
    )
    transparency_api_timeout: int = Field(default=30, description="API timeout")
    transparency_api_max_retries: int = Field(default=3, description="Max retries")
    transparency_api_header_key: str = Field(
        default="chave-api-dados",
        description="Portal da Transparência API header key name"
    )
    
    # Dados.gov.br API Configuration
    dados_gov_api_key: Optional[SecretStr] = Field(
        default=None,
        description="Dados.gov.br API key (if required)"
    )
    
    # LLM Configuration
    llm_provider: str = Field(
        default="groq",
        description="LLM provider (groq, together, huggingface)"
    )
    llm_model_name: str = Field(
        default="mixtral-8x7b-32768",
        description="LLM model name"
    )
    llm_temperature: float = Field(default=0.7, description="LLM temperature")
    llm_max_tokens: int = Field(default=2048, description="Max tokens")
    llm_top_p: float = Field(default=0.9, description="Top-p sampling")
    llm_stream: bool = Field(default=True, description="Enable streaming")
    
    # Provider API Keys
    groq_api_key: Optional[SecretStr] = Field(default=None, description="Groq API key")
    groq_api_base_url: str = Field(
        default="https://api.groq.com/openai/v1",
        description="Groq base URL"
    )
    
    together_api_key: Optional[SecretStr] = Field(default=None, description="Together API key")
    together_api_base_url: str = Field(
        default="https://api.together.xyz/v1",
        description="Together base URL"
    )
    
    huggingface_api_key: Optional[SecretStr] = Field(default=None, description="HuggingFace API key")
    huggingface_model_id: str = Field(
        default="mistralai/Mistral-7B-Instruct-v0.2",
        description="HuggingFace model ID"
    )
    
    # Maritaca AI Configuration
    maritaca_api_key: Optional[SecretStr] = Field(default=None, description="Maritaca AI API key")
    maritaca_api_base_url: str = Field(
        default="https://chat.maritaca.ai/api",
        description="Maritaca AI base URL"
    )
    maritaca_model: str = Field(
        default="sabiazinho-3",
        description="Default Maritaca AI model (sabiazinho-3, sabia-3, sabia-3-medium, sabia-3-large)"
    )
    
    # Vector Store
    vector_store_type: str = Field(
        default="faiss",
        description="Vector store type (faiss, chromadb)"
    )
    embedding_model: str = Field(
        default="sentence-transformers/all-MiniLM-L6-v2",
        description="Embedding model"
    )
    embedding_dimension: int = Field(default=384, description="Embedding dimension")
    vector_index_path: Path = Field(
        default=Path("./vector_store/index.faiss"),
        description="Vector index path"
    )
    
    # ChromaDB
    chroma_persist_directory: Path = Field(
        default=Path("./chroma_db"),
        description="ChromaDB persist directory"
    )
    chroma_collection_name: str = Field(
        default="cidadao_memory",
        description="ChromaDB collection name"
    )
    
    # Security - REQUIRED in production
    secret_key: SecretStr = Field(
        description="Application secret key (REQUIRED)"
    )
    jwt_secret_key: SecretStr = Field(
        description="JWT secret key (REQUIRED)"
    )
    jwt_algorithm: str = Field(default="HS256", description="JWT algorithm")
    jwt_access_token_expire_minutes: int = Field(default=30, description="Access token expiry")
    jwt_refresh_token_expire_days: int = Field(default=7, description="Refresh token expiry")
    bcrypt_rounds: int = Field(default=12, description="Bcrypt rounds")
    
    # CORS
    cors_origins: List[str] = Field(
        default=[
            "http://localhost:3000",
            "http://localhost:3001",
            "http://localhost:8000",
            "http://127.0.0.1:3000",
            "https://cidadao-ai-frontend.vercel.app",
            "https://cidadao-ai.vercel.app",
            "https://*.vercel.app",
            "https://neural-thinker-cidadao-ai-backend.hf.space",
            "https://*.hf.space"
        ],
        description="CORS allowed origins"
    )
    cors_allow_credentials: bool = Field(default=True, description="Allow credentials")
    cors_allow_methods: List[str] = Field(
        default=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"],
        description="Allowed methods"
    )
    cors_allow_headers: List[str] = Field(default=["*"], description="Allowed headers")
    cors_exposed_headers: List[str] = Field(
        default=[
            "X-RateLimit-Limit",
            "X-RateLimit-Remaining",
            "X-RateLimit-Reset",
            "X-Request-ID",
            "X-Total-Count"
        ],
        description="Exposed headers"
    )
    cors_max_age: int = Field(default=3600, description="CORS max age in seconds")
    
    # Rate Limiting
    rate_limit_per_minute: int = Field(default=60, description="Rate limit per minute")
    rate_limit_per_hour: int = Field(default=1000, description="Rate limit per hour")
    rate_limit_per_day: int = Field(default=10000, description="Rate limit per day")
    
    # IP Whitelist
    ip_whitelist_enabled: bool = Field(default=True, description="Enable IP whitelist in production")
    ip_whitelist_strict: bool = Field(default=False, description="Strict mode - reject if IP unknown")
    ip_whitelist_cache_ttl: int = Field(default=300, description="IP whitelist cache TTL seconds")
    
    # Celery
    celery_broker_url: str = Field(
        default="redis://localhost:6379/1",
        description="Celery broker URL"
    )
    celery_result_backend: str = Field(
        default="redis://localhost:6379/2",
        description="Celery result backend"
    )
    celery_task_serializer: str = Field(default="json", description="Task serializer")
    celery_result_serializer: str = Field(default="json", description="Result serializer")
    celery_accept_content: List[str] = Field(default=["json"], description="Accept content")
    celery_timezone: str = Field(default="America/Sao_Paulo", description="Timezone")
    celery_enable_utc: bool = Field(default=True, description="Enable UTC")
    
    # Monitoring
    enable_metrics: bool = Field(default=True, description="Enable metrics")
    prometheus_port: int = Field(default=9090, description="Prometheus port")
    grafana_port: int = Field(default=3000, description="Grafana port")
    
    # OpenTelemetry
    otel_service_name: str = Field(default="cidadao-ai", description="Service name")
    otel_exporter_otlp_endpoint: str = Field(
        default="http://localhost:4317",
        description="OTLP endpoint"
    )
    otel_exporter_otlp_insecure: bool = Field(default=True, description="OTLP insecure")
    otel_traces_exporter: str = Field(default="otlp", description="Traces exporter")
    otel_metrics_exporter: str = Field(default="otlp", description="Metrics exporter")
    otel_logs_exporter: str = Field(default="otlp", description="Logs exporter")
    
    # Audit
    audit_log_enabled: bool = Field(default=True, description="Enable audit logging")
    audit_log_path: Path = Field(
        default=Path("./audit_logs"),
        description="Audit log path"
    )
    audit_log_rotation: str = Field(default="daily", description="Log rotation")
    audit_log_retention_days: int = Field(default=90, description="Log retention days")
    audit_hash_algorithm: str = Field(default="sha256", description="Hash algorithm")
    
    # Models API Configuration
    models_api_enabled: bool = Field(default=True, description="Enable models API")
    models_api_url: str = Field(
        default="https://neural-thinker-cidadao-ai-models.hf.space",
        description="Models API URL"
    )
    models_api_timeout: int = Field(default=30, description="Models API timeout seconds")
    models_fallback_local: bool = Field(default=True, description="Use local ML as fallback")
    models_circuit_breaker_failures: int = Field(default=3, description="Max failures before circuit break")
    
    # ML Configuration
    anomaly_detection_threshold: float = Field(
        default=0.8,
        description="Anomaly detection threshold"
    )
    clustering_min_samples: int = Field(default=5, description="Min clustering samples")
    time_series_seasonality: str = Field(default="yearly", description="Seasonality")
    explainer_max_samples: int = Field(default=100, description="Max explainer samples")
    
    # Cache
    cache_ttl_seconds: int = Field(default=3600, description="Cache TTL")
    cache_max_size: int = Field(default=1000, description="Max cache size")
    
    # Compression
    compression_enabled: bool = Field(default=True, description="Enable response compression")
    compression_min_size: int = Field(default=1024, description="Min size to compress (bytes)")
    compression_gzip_level: int = Field(default=6, description="Gzip compression level (1-9)")
    compression_brotli_quality: int = Field(default=4, description="Brotli quality (0-11)")
    compression_algorithms: List[str] = Field(
        default=["gzip", "br", "deflate"],
        description="Enabled compression algorithms"
    )
    
    # Feature Flags
    enable_fine_tuning: bool = Field(default=False, description="Enable fine-tuning")
    enable_autonomous_crawling: bool = Field(default=False, description="Enable crawling")
    enable_advanced_visualizations: bool = Field(default=False, description="Advanced viz")
    enable_ethics_guard: bool = Field(default=True, description="Enable ethics guard")
    
    # Development
    enable_debug_toolbar: bool = Field(default=True, description="Debug toolbar")
    enable_sql_echo: bool = Field(default=False, description="SQL echo")
    enable_profiling: bool = Field(default=False, description="Enable profiling")
    
    @field_validator("app_env")
    @classmethod
    def validate_environment(cls, v: str) -> str:
        """Validate environment value."""
        allowed = ["development", "staging", "production", "testing"]
        if v not in allowed:
            raise ValueError(f"app_env must be one of {allowed}")
        return v
    
    @field_validator("log_level")
    @classmethod
    def validate_log_level(cls, v: str) -> str:
        """Validate log level."""
        allowed = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
        v = v.upper()
        if v not in allowed:
            raise ValueError(f"log_level must be one of {allowed}")
        return v
    
    @property
    def is_development(self) -> bool:
        """Check if in development mode."""
        return self.app_env == "development"
    
    @property
    def is_production(self) -> bool:
        """Check if in production mode."""
        return self.app_env == "production"
    
    @property
    def is_testing(self) -> bool:
        """Check if in testing mode."""
        return self.app_env == "testing"
    
    def get_database_url(self, async_mode: bool = True) -> str:
        """Get database URL for async or sync mode."""
        if async_mode and self.database_url.startswith("postgresql://"):
            return self.database_url.replace("postgresql://", "postgresql+asyncpg://")
        return self.database_url
    
    def dict_for_logging(self) -> Dict[str, Any]:
        """Get safe dict for logging (no secrets)."""
        data = self.model_dump()
        # Remove sensitive fields
        sensitive_fields = [
            "secret_key", "jwt_secret_key", "transparency_api_key",
            "groq_api_key", "together_api_key", "huggingface_api_key",
            "redis_password", "database_url"
        ]
        for field in sensitive_fields:
            if field in data:
                data[field] = "***REDACTED***"
        return data

    @classmethod
    async def from_vault(cls, vault_config: Optional[VaultConfig] = None) -> "Settings":
        """
        Create Settings instance with secrets loaded from Vault
        
        This method initializes a SecretManager with Vault integration
        and loads secrets with proper fallback to environment variables.
        """
        # Create vault config from environment if not provided
        if vault_config is None:
            vault_config = VaultConfig(
                url=os.getenv("VAULT_URL", "http://localhost:8200"),
                token=os.getenv("VAULT_TOKEN"),
                namespace=os.getenv("VAULT_NAMESPACE"),
                secret_path=os.getenv("VAULT_SECRET_PATH", "secret/cidadao-ai"),
                fallback_to_env=os.getenv("VAULT_FALLBACK_TO_ENV", "true").lower() == "true",
                require_vault=os.getenv("VAULT_REQUIRE", "false").lower() == "true"
            )
        
        # Initialize secret manager
        secret_manager = SecretManager(vault_config)
        await secret_manager.initialize()
        
        # Load all secret schemas
        database_secrets = await secret_manager.get_secrets_schema("database")
        jwt_secrets = await secret_manager.get_secrets_schema("jwt")
        api_secrets = await secret_manager.get_secrets_schema("api_keys")
        app_secrets = await secret_manager.get_secrets_schema("application")
        redis_secrets = await secret_manager.get_secrets_schema("redis")
        infra_secrets = await secret_manager.get_secrets_schema("infrastructure")
        
        # Build configuration data
        config_data = {}
        
        # Core application
        if app_secrets and app_secrets.secret_key:
            config_data["secret_key"] = app_secrets.secret_key
        
        # JWT configuration
        if jwt_secrets:
            if jwt_secrets.secret_key:
                config_data["jwt_secret_key"] = jwt_secrets.secret_key
            config_data["jwt_algorithm"] = jwt_secrets.algorithm
            config_data["jwt_access_token_expire_minutes"] = jwt_secrets.access_token_expire_minutes
            config_data["jwt_refresh_token_expire_days"] = jwt_secrets.refresh_token_expire_days
        
        # Database configuration  
        if database_secrets and database_secrets.url:
            config_data["database_url"] = database_secrets.url
        
        # Redis configuration
        if redis_secrets:
            config_data["redis_url"] = redis_secrets.url
            if redis_secrets.password:
                config_data["redis_password"] = redis_secrets.password
        
        # API Keys
        if api_secrets:
            if api_secrets.transparency_api_key:
                config_data["transparency_api_key"] = api_secrets.transparency_api_key
            if api_secrets.groq_api_key:
                config_data["groq_api_key"] = api_secrets.groq_api_key
            if api_secrets.together_api_key:
                config_data["together_api_key"] = api_secrets.together_api_key
            if api_secrets.huggingface_api_key:
                config_data["huggingface_api_key"] = api_secrets.huggingface_api_key
            if hasattr(api_secrets, "dados_gov_api_key") and api_secrets.dados_gov_api_key:
                config_data["dados_gov_api_key"] = api_secrets.dados_gov_api_key
        
        # Create Settings instance with secrets
        # Environment variables will still be used for non-secret configuration
        settings = cls(**config_data)
        
        # Store reference to secret manager for cleanup
        settings._secret_manager = secret_manager
        
        return settings
    
    async def close_vault_connection(self):
        """Close Vault connection if it exists"""
        if hasattr(self, '_secret_manager') and self._secret_manager:
            await self._secret_manager.close()


@lru_cache()
def get_settings() -> Settings:
    """Get cached settings instance."""
    return Settings()


async def get_settings_with_vault(vault_config: Optional[VaultConfig] = None) -> Settings:
    """Get settings instance with Vault integration"""
    return await Settings.from_vault(vault_config)


# Global settings instance
settings = get_settings()