Spaces:

Cuong2004
/

LocalMate

Running

App Files Files Community

LocalMate / app /shared /integrations /embedding_client.py

Cuong2004

Initial HF deployment

ca7a2c2 5 days ago

raw

history blame contribute delete

3.67 kB

	"""Embedding client for text and image embeddings.

	Supports:
	- Text: Google text-embedding-004 (768-dim)
	- Image: HuggingFace CLIP/SigLIP (512/768-dim)
	"""

	import httpx
	from io import BytesIO
	from google import genai

	from app.core.config import settings

	# Initialize Google GenAI client
	client = genai.Client(api_key=settings.google_api_key)


	class EmbeddingClient:
	"""Client for generating text and image embeddings."""

	def __init__(self):
	"""Initialize embedding client."""
	self.text_model = settings.embedding_model
	self.hf_api_key = settings.huggingface_api_key

	async def embed_text(self, text: str) -> list[float]:
	"""
	Generate text embedding using text-embedding-004.

	Args:
	text: Text to embed

	Returns:
	768-dimensional embedding vector
	"""
	response = client.models.embed_content(
	model=self.text_model,
	contents=text,
	)
	return response.embeddings[0].values

	async def embed_texts(self, texts: list[str]) -> list[list[float]]:
	"""
	Generate embeddings for multiple texts.

	Args:
	texts: List of texts to embed

	Returns:
	List of embedding vectors
	"""
	response = client.models.embed_content(
	model=self.text_model,
	contents=texts,
	)
	return [emb.values for emb in response.embeddings]

	async def embed_image(self, image_url: str) -> list[float] \| None:
	"""
	Generate image embedding using CLIP via HuggingFace.

	Args:
	image_url: URL of the image

	Returns:
	512-dimensional embedding vector, or None if failed
	"""
	if not self.hf_api_key:
	return None

	try:
	async with httpx.AsyncClient() as http_client:
	# Use CLIP model via HuggingFace Inference API
	response = await http_client.post(
	"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fopenai%2Fclip-vit-base-patch32%26quot%3B%3C%2Fspan%3E%2C%3C!-- HTML_TAG_END -->
	headers={"Authorization": f"Bearer {self.hf_api_key}"},
	json={"inputs": {"image": image_url}},
	timeout=30.0,
	)
	if response.status_code == 200:
	return response.json()
	return None
	except Exception:
	return None

	async def embed_image_bytes(self, image_bytes: bytes) -> list[float] \| None:
	"""
	Generate image embedding from raw image bytes.

	Args:
	image_bytes: Raw image bytes (JPEG, PNG, etc.)

	Returns:
	512-dimensional embedding vector, or None if failed
	"""
	if not self.hf_api_key:
	return None

	try:
	import base64
	# Convert bytes to base64 data URL
	b64_image = base64.b64encode(image_bytes).decode('utf-8')
	data_url = f"data:image/jpeg;base64,{b64_image}"

	async with httpx.AsyncClient() as http_client:
	response = await http_client.post(
	"/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fopenai%2Fclip-vit-base-patch32%26quot%3B%3C%2Fspan%3E%2C%3C!-- HTML_TAG_END -->
	headers={"Authorization": f"Bearer {self.hf_api_key}"},
	json={"inputs": {"image": data_url}},
	timeout=30.0,
	)
	if response.status_code == 200:
	return response.json()
	return None
	except Exception:
	return None


	# Global embedding client instance
	embedding_client = EmbeddingClient()