|
|
""" |
|
|
Module: core.config |
|
|
Description: Application configuration management |
|
|
Author: Anderson H. Silva |
|
|
Date: 2025-01-24 |
|
|
License: Proprietary - All rights reserved |
|
|
""" |
|
|
|
|
|
from functools import lru_cache |
|
|
from pathlib import Path |
|
|
from typing import Any, Dict, List, Optional |
|
|
import asyncio |
|
|
import os |
|
|
|
|
|
from pydantic import Field, SecretStr, field_validator |
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict |
|
|
|
|
|
|
|
|
from .secret_manager import SecretManager |
|
|
from .vault_client import VaultConfig |
|
|
|
|
|
|
|
|
class Settings(BaseSettings): |
|
|
"""Application settings with environment variable support.""" |
|
|
|
|
|
model_config = SettingsConfigDict( |
|
|
env_file=".env", |
|
|
env_file_encoding="utf-8", |
|
|
case_sensitive=False, |
|
|
extra="ignore", |
|
|
) |
|
|
|
|
|
|
|
|
app_name: str = Field(default="cidadao-ai", description="Application name") |
|
|
app_env: str = Field(default="development", description="Environment") |
|
|
app_version: str = Field(default="1.0.0", description="Version") |
|
|
debug: bool = Field(default=False, description="Debug mode") |
|
|
log_level: str = Field(default="INFO", description="Logging level") |
|
|
|
|
|
|
|
|
host: str = Field(default="0.0.0.0", description="Server host") |
|
|
port: int = Field(default=8000, description="Server port") |
|
|
workers: int = Field(default=1, description="Number of workers") |
|
|
|
|
|
|
|
|
database_url: str = Field( |
|
|
description="Database connection URL (REQUIRED)" |
|
|
) |
|
|
database_pool_size: int = Field(default=10, description="DB pool size") |
|
|
database_pool_overflow: int = Field(default=20, description="DB pool overflow") |
|
|
database_pool_timeout: int = Field(default=30, description="DB pool timeout") |
|
|
|
|
|
|
|
|
redis_url: str = Field( |
|
|
default="redis://localhost:6379/0", |
|
|
description="Redis connection URL" |
|
|
) |
|
|
redis_password: Optional[SecretStr] = Field(default=None, description="Redis password") |
|
|
redis_pool_size: int = Field(default=10, description="Redis pool size") |
|
|
|
|
|
|
|
|
transparency_api_key: Optional[SecretStr] = Field( |
|
|
default=None, |
|
|
description="Portal da Transparência API key" |
|
|
) |
|
|
transparency_api_base_url: str = Field( |
|
|
default="https://api.portaldatransparencia.gov.br", |
|
|
description="Portal da Transparência base URL" |
|
|
) |
|
|
transparency_api_timeout: int = Field(default=30, description="API timeout") |
|
|
transparency_api_max_retries: int = Field(default=3, description="Max retries") |
|
|
transparency_api_header_key: str = Field( |
|
|
default="chave-api-dados", |
|
|
description="Portal da Transparência API header key name" |
|
|
) |
|
|
|
|
|
|
|
|
dados_gov_api_key: Optional[SecretStr] = Field( |
|
|
default=None, |
|
|
description="Dados.gov.br API key (if required)" |
|
|
) |
|
|
|
|
|
|
|
|
llm_provider: str = Field( |
|
|
default="groq", |
|
|
description="LLM provider (groq, together, huggingface)" |
|
|
) |
|
|
llm_model_name: str = Field( |
|
|
default="mixtral-8x7b-32768", |
|
|
description="LLM model name" |
|
|
) |
|
|
llm_temperature: float = Field(default=0.7, description="LLM temperature") |
|
|
llm_max_tokens: int = Field(default=2048, description="Max tokens") |
|
|
llm_top_p: float = Field(default=0.9, description="Top-p sampling") |
|
|
llm_stream: bool = Field(default=True, description="Enable streaming") |
|
|
|
|
|
|
|
|
groq_api_key: Optional[SecretStr] = Field(default=None, description="Groq API key") |
|
|
groq_api_base_url: str = Field( |
|
|
default="https://api.groq.com/openai/v1", |
|
|
description="Groq base URL" |
|
|
) |
|
|
|
|
|
together_api_key: Optional[SecretStr] = Field(default=None, description="Together API key") |
|
|
together_api_base_url: str = Field( |
|
|
default="https://api.together.xyz/v1", |
|
|
description="Together base URL" |
|
|
) |
|
|
|
|
|
huggingface_api_key: Optional[SecretStr] = Field(default=None, description="HuggingFace API key") |
|
|
huggingface_model_id: str = Field( |
|
|
default="mistralai/Mistral-7B-Instruct-v0.2", |
|
|
description="HuggingFace model ID" |
|
|
) |
|
|
|
|
|
|
|
|
maritaca_api_key: Optional[SecretStr] = Field(default=None, description="Maritaca AI API key") |
|
|
maritaca_api_base_url: str = Field( |
|
|
default="https://chat.maritaca.ai/api", |
|
|
description="Maritaca AI base URL" |
|
|
) |
|
|
maritaca_model: str = Field( |
|
|
default="sabiazinho-3", |
|
|
description="Default Maritaca AI model (sabiazinho-3, sabia-3, sabia-3-medium, sabia-3-large)" |
|
|
) |
|
|
|
|
|
|
|
|
vector_store_type: str = Field( |
|
|
default="faiss", |
|
|
description="Vector store type (faiss, chromadb)" |
|
|
) |
|
|
embedding_model: str = Field( |
|
|
default="sentence-transformers/all-MiniLM-L6-v2", |
|
|
description="Embedding model" |
|
|
) |
|
|
embedding_dimension: int = Field(default=384, description="Embedding dimension") |
|
|
vector_index_path: Path = Field( |
|
|
default=Path("./vector_store/index.faiss"), |
|
|
description="Vector index path" |
|
|
) |
|
|
|
|
|
|
|
|
chroma_persist_directory: Path = Field( |
|
|
default=Path("./chroma_db"), |
|
|
description="ChromaDB persist directory" |
|
|
) |
|
|
chroma_collection_name: str = Field( |
|
|
default="cidadao_memory", |
|
|
description="ChromaDB collection name" |
|
|
) |
|
|
|
|
|
|
|
|
secret_key: SecretStr = Field( |
|
|
description="Application secret key (REQUIRED)" |
|
|
) |
|
|
jwt_secret_key: SecretStr = Field( |
|
|
description="JWT secret key (REQUIRED)" |
|
|
) |
|
|
jwt_algorithm: str = Field(default="HS256", description="JWT algorithm") |
|
|
jwt_access_token_expire_minutes: int = Field(default=30, description="Access token expiry") |
|
|
jwt_refresh_token_expire_days: int = Field(default=7, description="Refresh token expiry") |
|
|
bcrypt_rounds: int = Field(default=12, description="Bcrypt rounds") |
|
|
|
|
|
|
|
|
cors_origins: List[str] = Field( |
|
|
default=[ |
|
|
"http://localhost:3000", |
|
|
"http://localhost:3001", |
|
|
"http://localhost:8000", |
|
|
"http://127.0.0.1:3000", |
|
|
"https://cidadao-ai-frontend.vercel.app", |
|
|
"https://cidadao-ai.vercel.app", |
|
|
"https://*.vercel.app", |
|
|
"https://neural-thinker-cidadao-ai-backend.hf.space", |
|
|
"https://*.hf.space" |
|
|
], |
|
|
description="CORS allowed origins" |
|
|
) |
|
|
cors_allow_credentials: bool = Field(default=True, description="Allow credentials") |
|
|
cors_allow_methods: List[str] = Field( |
|
|
default=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS", "HEAD"], |
|
|
description="Allowed methods" |
|
|
) |
|
|
cors_allow_headers: List[str] = Field(default=["*"], description="Allowed headers") |
|
|
cors_exposed_headers: List[str] = Field( |
|
|
default=[ |
|
|
"X-RateLimit-Limit", |
|
|
"X-RateLimit-Remaining", |
|
|
"X-RateLimit-Reset", |
|
|
"X-Request-ID", |
|
|
"X-Total-Count" |
|
|
], |
|
|
description="Exposed headers" |
|
|
) |
|
|
cors_max_age: int = Field(default=3600, description="CORS max age in seconds") |
|
|
|
|
|
|
|
|
rate_limit_per_minute: int = Field(default=60, description="Rate limit per minute") |
|
|
rate_limit_per_hour: int = Field(default=1000, description="Rate limit per hour") |
|
|
rate_limit_per_day: int = Field(default=10000, description="Rate limit per day") |
|
|
|
|
|
|
|
|
ip_whitelist_enabled: bool = Field(default=True, description="Enable IP whitelist in production") |
|
|
ip_whitelist_strict: bool = Field(default=False, description="Strict mode - reject if IP unknown") |
|
|
ip_whitelist_cache_ttl: int = Field(default=300, description="IP whitelist cache TTL seconds") |
|
|
|
|
|
|
|
|
celery_broker_url: str = Field( |
|
|
default="redis://localhost:6379/1", |
|
|
description="Celery broker URL" |
|
|
) |
|
|
celery_result_backend: str = Field( |
|
|
default="redis://localhost:6379/2", |
|
|
description="Celery result backend" |
|
|
) |
|
|
celery_task_serializer: str = Field(default="json", description="Task serializer") |
|
|
celery_result_serializer: str = Field(default="json", description="Result serializer") |
|
|
celery_accept_content: List[str] = Field(default=["json"], description="Accept content") |
|
|
celery_timezone: str = Field(default="America/Sao_Paulo", description="Timezone") |
|
|
celery_enable_utc: bool = Field(default=True, description="Enable UTC") |
|
|
|
|
|
|
|
|
enable_metrics: bool = Field(default=True, description="Enable metrics") |
|
|
prometheus_port: int = Field(default=9090, description="Prometheus port") |
|
|
grafana_port: int = Field(default=3000, description="Grafana port") |
|
|
|
|
|
|
|
|
otel_service_name: str = Field(default="cidadao-ai", description="Service name") |
|
|
otel_exporter_otlp_endpoint: str = Field( |
|
|
default="http://localhost:4317", |
|
|
description="OTLP endpoint" |
|
|
) |
|
|
otel_exporter_otlp_insecure: bool = Field(default=True, description="OTLP insecure") |
|
|
otel_traces_exporter: str = Field(default="otlp", description="Traces exporter") |
|
|
otel_metrics_exporter: str = Field(default="otlp", description="Metrics exporter") |
|
|
otel_logs_exporter: str = Field(default="otlp", description="Logs exporter") |
|
|
|
|
|
|
|
|
audit_log_enabled: bool = Field(default=True, description="Enable audit logging") |
|
|
audit_log_path: Path = Field( |
|
|
default=Path("./audit_logs"), |
|
|
description="Audit log path" |
|
|
) |
|
|
audit_log_rotation: str = Field(default="daily", description="Log rotation") |
|
|
audit_log_retention_days: int = Field(default=90, description="Log retention days") |
|
|
audit_hash_algorithm: str = Field(default="sha256", description="Hash algorithm") |
|
|
|
|
|
|
|
|
models_api_enabled: bool = Field(default=True, description="Enable models API") |
|
|
models_api_url: str = Field( |
|
|
default="https://neural-thinker-cidadao-ai-models.hf.space", |
|
|
description="Models API URL" |
|
|
) |
|
|
models_api_timeout: int = Field(default=30, description="Models API timeout seconds") |
|
|
models_fallback_local: bool = Field(default=True, description="Use local ML as fallback") |
|
|
models_circuit_breaker_failures: int = Field(default=3, description="Max failures before circuit break") |
|
|
|
|
|
|
|
|
anomaly_detection_threshold: float = Field( |
|
|
default=0.8, |
|
|
description="Anomaly detection threshold" |
|
|
) |
|
|
clustering_min_samples: int = Field(default=5, description="Min clustering samples") |
|
|
time_series_seasonality: str = Field(default="yearly", description="Seasonality") |
|
|
explainer_max_samples: int = Field(default=100, description="Max explainer samples") |
|
|
|
|
|
|
|
|
cache_ttl_seconds: int = Field(default=3600, description="Cache TTL") |
|
|
cache_max_size: int = Field(default=1000, description="Max cache size") |
|
|
|
|
|
|
|
|
compression_enabled: bool = Field(default=True, description="Enable response compression") |
|
|
compression_min_size: int = Field(default=1024, description="Min size to compress (bytes)") |
|
|
compression_gzip_level: int = Field(default=6, description="Gzip compression level (1-9)") |
|
|
compression_brotli_quality: int = Field(default=4, description="Brotli quality (0-11)") |
|
|
compression_algorithms: List[str] = Field( |
|
|
default=["gzip", "br", "deflate"], |
|
|
description="Enabled compression algorithms" |
|
|
) |
|
|
|
|
|
|
|
|
enable_fine_tuning: bool = Field(default=False, description="Enable fine-tuning") |
|
|
enable_autonomous_crawling: bool = Field(default=False, description="Enable crawling") |
|
|
enable_advanced_visualizations: bool = Field(default=False, description="Advanced viz") |
|
|
enable_ethics_guard: bool = Field(default=True, description="Enable ethics guard") |
|
|
|
|
|
|
|
|
enable_debug_toolbar: bool = Field(default=True, description="Debug toolbar") |
|
|
enable_sql_echo: bool = Field(default=False, description="SQL echo") |
|
|
enable_profiling: bool = Field(default=False, description="Enable profiling") |
|
|
|
|
|
@field_validator("app_env") |
|
|
@classmethod |
|
|
def validate_environment(cls, v: str) -> str: |
|
|
"""Validate environment value.""" |
|
|
allowed = ["development", "staging", "production", "testing"] |
|
|
if v not in allowed: |
|
|
raise ValueError(f"app_env must be one of {allowed}") |
|
|
return v |
|
|
|
|
|
@field_validator("log_level") |
|
|
@classmethod |
|
|
def validate_log_level(cls, v: str) -> str: |
|
|
"""Validate log level.""" |
|
|
allowed = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] |
|
|
v = v.upper() |
|
|
if v not in allowed: |
|
|
raise ValueError(f"log_level must be one of {allowed}") |
|
|
return v |
|
|
|
|
|
@property |
|
|
def is_development(self) -> bool: |
|
|
"""Check if in development mode.""" |
|
|
return self.app_env == "development" |
|
|
|
|
|
@property |
|
|
def is_production(self) -> bool: |
|
|
"""Check if in production mode.""" |
|
|
return self.app_env == "production" |
|
|
|
|
|
@property |
|
|
def is_testing(self) -> bool: |
|
|
"""Check if in testing mode.""" |
|
|
return self.app_env == "testing" |
|
|
|
|
|
def get_database_url(self, async_mode: bool = True) -> str: |
|
|
"""Get database URL for async or sync mode.""" |
|
|
if async_mode and self.database_url.startswith("postgresql://"): |
|
|
return self.database_url.replace("postgresql://", "postgresql+asyncpg://") |
|
|
return self.database_url |
|
|
|
|
|
def dict_for_logging(self) -> Dict[str, Any]: |
|
|
"""Get safe dict for logging (no secrets).""" |
|
|
data = self.model_dump() |
|
|
|
|
|
sensitive_fields = [ |
|
|
"secret_key", "jwt_secret_key", "transparency_api_key", |
|
|
"groq_api_key", "together_api_key", "huggingface_api_key", |
|
|
"redis_password", "database_url" |
|
|
] |
|
|
for field in sensitive_fields: |
|
|
if field in data: |
|
|
data[field] = "***REDACTED***" |
|
|
return data |
|
|
|
|
|
@classmethod |
|
|
async def from_vault(cls, vault_config: Optional[VaultConfig] = None) -> "Settings": |
|
|
""" |
|
|
Create Settings instance with secrets loaded from Vault |
|
|
|
|
|
This method initializes a SecretManager with Vault integration |
|
|
and loads secrets with proper fallback to environment variables. |
|
|
""" |
|
|
|
|
|
if vault_config is None: |
|
|
vault_config = VaultConfig( |
|
|
url=os.getenv("VAULT_URL", "http://localhost:8200"), |
|
|
token=os.getenv("VAULT_TOKEN"), |
|
|
namespace=os.getenv("VAULT_NAMESPACE"), |
|
|
secret_path=os.getenv("VAULT_SECRET_PATH", "secret/cidadao-ai"), |
|
|
fallback_to_env=os.getenv("VAULT_FALLBACK_TO_ENV", "true").lower() == "true", |
|
|
require_vault=os.getenv("VAULT_REQUIRE", "false").lower() == "true" |
|
|
) |
|
|
|
|
|
|
|
|
secret_manager = SecretManager(vault_config) |
|
|
await secret_manager.initialize() |
|
|
|
|
|
|
|
|
database_secrets = await secret_manager.get_secrets_schema("database") |
|
|
jwt_secrets = await secret_manager.get_secrets_schema("jwt") |
|
|
api_secrets = await secret_manager.get_secrets_schema("api_keys") |
|
|
app_secrets = await secret_manager.get_secrets_schema("application") |
|
|
redis_secrets = await secret_manager.get_secrets_schema("redis") |
|
|
infra_secrets = await secret_manager.get_secrets_schema("infrastructure") |
|
|
|
|
|
|
|
|
config_data = {} |
|
|
|
|
|
|
|
|
if app_secrets and app_secrets.secret_key: |
|
|
config_data["secret_key"] = app_secrets.secret_key |
|
|
|
|
|
|
|
|
if jwt_secrets: |
|
|
if jwt_secrets.secret_key: |
|
|
config_data["jwt_secret_key"] = jwt_secrets.secret_key |
|
|
config_data["jwt_algorithm"] = jwt_secrets.algorithm |
|
|
config_data["jwt_access_token_expire_minutes"] = jwt_secrets.access_token_expire_minutes |
|
|
config_data["jwt_refresh_token_expire_days"] = jwt_secrets.refresh_token_expire_days |
|
|
|
|
|
|
|
|
if database_secrets and database_secrets.url: |
|
|
config_data["database_url"] = database_secrets.url |
|
|
|
|
|
|
|
|
if redis_secrets: |
|
|
config_data["redis_url"] = redis_secrets.url |
|
|
if redis_secrets.password: |
|
|
config_data["redis_password"] = redis_secrets.password |
|
|
|
|
|
|
|
|
if api_secrets: |
|
|
if api_secrets.transparency_api_key: |
|
|
config_data["transparency_api_key"] = api_secrets.transparency_api_key |
|
|
if api_secrets.groq_api_key: |
|
|
config_data["groq_api_key"] = api_secrets.groq_api_key |
|
|
if api_secrets.together_api_key: |
|
|
config_data["together_api_key"] = api_secrets.together_api_key |
|
|
if api_secrets.huggingface_api_key: |
|
|
config_data["huggingface_api_key"] = api_secrets.huggingface_api_key |
|
|
if hasattr(api_secrets, "dados_gov_api_key") and api_secrets.dados_gov_api_key: |
|
|
config_data["dados_gov_api_key"] = api_secrets.dados_gov_api_key |
|
|
|
|
|
|
|
|
|
|
|
settings = cls(**config_data) |
|
|
|
|
|
|
|
|
settings._secret_manager = secret_manager |
|
|
|
|
|
return settings |
|
|
|
|
|
async def close_vault_connection(self): |
|
|
"""Close Vault connection if it exists""" |
|
|
if hasattr(self, '_secret_manager') and self._secret_manager: |
|
|
await self._secret_manager.close() |
|
|
|
|
|
|
|
|
@lru_cache() |
|
|
def get_settings() -> Settings: |
|
|
"""Get cached settings instance.""" |
|
|
return Settings() |
|
|
|
|
|
|
|
|
async def get_settings_with_vault(vault_config: Optional[VaultConfig] = None) -> Settings: |
|
|
"""Get settings instance with Vault integration""" |
|
|
return await Settings.from_vault(vault_config) |
|
|
|
|
|
|
|
|
|
|
|
settings = get_settings() |