Spaces:

neural-thinker
/

cidadao.ai-backend

Paused

cidadao.ai-backend / src /infrastructure /query_cache.py

anderson-ufrj

feat: implement database performance optimization and query caching

ac5724d 3 months ago

10.1 kB

	"""
	Query result caching system for database optimization.

	This module provides intelligent caching of database query results
	to reduce database load and improve response times.
	"""

	import hashlib
	from typing import Any, Dict, List, Optional, Callable, TypeVar, Union
	from datetime import datetime, timedelta
	import asyncio
	from functools import wraps

	from sqlalchemy.sql import Select
	from sqlalchemy.ext.asyncio import AsyncSession

	from src.core import get_logger
	from src.services.cache_service import cache_service
	from src.core.json_utils import dumps, loads

	logger = get_logger(__name__)

	T = TypeVar('T')


	class QueryCache:
	"""
	Intelligent query result caching system.

	Features:
	- Automatic cache key generation
	- Configurable TTL per query type
	- Cache invalidation strategies
	- Performance metrics
	"""

	def __init__(self):
	"""Initialize query cache."""
	self._cache = cache_service
	self._ttl_config = {
	# Table-specific TTLs (in seconds)
	"investigations": 300, # 5 minutes
	"contracts": 3600, # 1 hour
	"users": 1800, # 30 minutes
	"anomalies": 600, # 10 minutes
	"agent_messages": 120, # 2 minutes
	"chat_sessions": 60, # 1 minute
	"default": 300 # 5 minutes default
	}

	# Cache statistics
	self._stats = {
	"hits": 0,
	"misses": 0,
	"invalidations": 0,
	"errors": 0
	}

	def _generate_cache_key(
	self,
	query: Union[str, Select],
	params: Optional[Dict[str, Any]] = None,
	prefix: str = "query"
	) -> str:
	"""Generate a unique cache key for a query."""
	# Convert query to string
	query_str = str(query.compile(compile_kwargs={"literal_binds": True})) if hasattr(query, 'compile') else str(query)

	# Include parameters in key
	if params:
	params_str = dumps(sorted(params.items()))
	else:
	params_str = ""

	# Create hash of query + params
	key_data = f"{query_str}:{params_str}"
	key_hash = hashlib.sha256(key_data.encode()).hexdigest()[:16]

	return f"db:{prefix}:{key_hash}"

	def _get_ttl_for_query(self, query: Union[str, Select]) -> int:
	"""Determine TTL based on query type."""
	query_str = str(query).lower()

	# Check for table names in query
	for table, ttl in self._ttl_config.items():
	if table in query_str:
	return ttl

	return self._ttl_config["default"]

	async def get_or_fetch(
	self,
	query: Union[str, Select],
	fetch_func: Callable,
	params: Optional[Dict[str, Any]] = None,
	ttl: Optional[int] = None,
	prefix: str = "query"
	) -> Any:
	"""
	Get query result from cache or fetch from database.

	Args:
	query: SQL query
	fetch_func: Async function to fetch data if not cached
	params: Query parameters
	ttl: Cache TTL (auto-determined if not provided)
	prefix: Cache key prefix

	Returns:
	Query result
	"""
	# Generate cache key
	cache_key = self._generate_cache_key(query, params, prefix)

	# Try to get from cache
	cached_result = await self._cache.get(cache_key)
	if cached_result is not None:
	self._stats["hits"] += 1
	logger.debug(f"Cache hit for query: {cache_key}")
	return cached_result

	# Cache miss - fetch from database
	self._stats["misses"] += 1
	logger.debug(f"Cache miss for query: {cache_key}")

	try:
	# Fetch data
	result = await fetch_func()

	# Determine TTL
	if ttl is None:
	ttl = self._get_ttl_for_query(query)

	# Cache the result
	await self._cache.set(
	cache_key,
	result,
	ttl=ttl,
	compress=len(dumps(result)) > 1024 # Compress if > 1KB
	)

	return result

	except Exception as e:
	self._stats["errors"] += 1
	logger.error(f"Error in cache fetch: {e}")
	raise

	async def invalidate(
	self,
	pattern: Optional[str] = None,
	table: Optional[str] = None,
	prefix: str = "query"
	):
	"""
	Invalidate cached queries.

	Args:
	pattern: Pattern to match cache keys
	table: Table name to invalidate
	prefix: Cache key prefix
	"""
	self._stats["invalidations"] += 1

	if pattern:
	# Invalidate by pattern
	invalidated = await self._invalidate_by_pattern(f"db:{prefix}:{pattern}*")
	logger.info(f"Invalidated {invalidated} cache entries matching pattern: {pattern}")

	elif table:
	# Invalidate all queries for a table
	invalidated = await self._invalidate_by_pattern(f"db:{table}")
	logger.info(f"Invalidated {invalidated} cache entries for table: {table}")

	else:
	# Invalidate all query cache
	invalidated = await self._invalidate_by_pattern(f"db:{prefix}:*")
	logger.info(f"Invalidated {invalidated} cache entries with prefix: {prefix}")

	async def _invalidate_by_pattern(self, pattern: str) -> int:
	"""Invalidate cache entries matching a pattern."""
	# Note: This is a simplified implementation
	# In production, use Redis SCAN to find matching keys
	count = 0

	try:
	# For now, we'll track invalidations
	logger.debug(f"Invalidating cache pattern: {pattern}")
	count = 1 # Placeholder
	except Exception as e:
	logger.error(f"Error invalidating cache: {e}")

	return count

	def get_stats(self) -> Dict[str, Any]:
	"""Get cache statistics."""
	total_requests = self._stats["hits"] + self._stats["misses"]
	hit_rate = (
	self._stats["hits"] / total_requests
	if total_requests > 0 else 0
	)

	return {
	**self._stats,
	"total_requests": total_requests,
	"hit_rate": hit_rate
	}


	# Global query cache instance
	query_cache = QueryCache()


	def cached_query(
	ttl: Optional[int] = None,
	key_prefix: str = "query",
	invalidate_on: Optional[List[str]] = None
	):
	"""
	Decorator for caching database queries.

	Args:
	ttl: Cache TTL in seconds
	key_prefix: Prefix for cache key
	invalidate_on: List of table names that invalidate this cache

	Example:
	@cached_query(ttl=300, invalidate_on=["users"])
	async def get_user_by_id(session: AsyncSession, user_id: int):
	result = await session.execute(
	select(User).where(User.id == user_id)
	)
	return result.scalar_one_or_none()
	"""
	def decorator(func: Callable) -> Callable:
	@wraps(func)
	async def wrapper(args, *kwargs):
	# Extract session and create a cache key from function name and args
	session = None
	for arg in args:
	if isinstance(arg, AsyncSession):
	session = arg
	break

	# Generate cache key from function and arguments
	cache_key_parts = [
	func.__name__,
	*[str(arg) for arg in args if not isinstance(arg, AsyncSession)],
	*[f"{k}={v}" for k, v in sorted(kwargs.items())]
	]
	cache_key = ":".join(cache_key_parts)

	# Use query cache
	async def fetch_func():
	return await func(args, *kwargs)

	return await query_cache.get_or_fetch(
	query=cache_key, # Use function signature as "query"
	fetch_func=fetch_func,
	ttl=ttl,
	prefix=key_prefix
	)

	# Store invalidation configuration
	if invalidate_on:
	wrapper._invalidate_on = invalidate_on

	return wrapper
	return decorator


	class CachedRepository:
	"""
	Base repository class with built-in caching support.

	Example:
	class UserRepository(CachedRepository):
	def __init__(self, session: AsyncSession):
	super().__init__(session, "users")

	@cached_query(ttl=1800)
	async def get_by_id(self, user_id: int):
	# Implementation
	"""

	def __init__(self, session: AsyncSession, table_name: str):
	"""
	Initialize cached repository.

	Args:
	session: Database session
	table_name: Name of the table for cache invalidation
	"""
	self.session = session
	self.table_name = table_name
	self._cache = query_cache

	async def invalidate_cache(self, pattern: Optional[str] = None):
	"""Invalidate cache for this repository."""
	await self._cache.invalidate(
	table=self.table_name if not pattern else None,
	pattern=pattern
	)

	async def after_insert(self, entity: Any):
	"""Hook called after insert - invalidates relevant cache."""
	await self.invalidate_cache()

	async def after_update(self, entity: Any):
	"""Hook called after update - invalidates relevant cache."""
	await self.invalidate_cache()

	async def after_delete(self, entity: Any):
	"""Hook called after delete - invalidates relevant cache."""
	await self.invalidate_cache()