'''
import os
from pathlib import Path
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

app = FastAPI()

# Define the cache directory path within your project
cache_dir = str(Path(__file__).parent.resolve() / 'cache')

# Create the cache directory if it doesn't exist
os.makedirs(cache_dir, exist_ok=True)

# Set the TRANSFORMERS_CACHE environment variable to the cache directory
os.environ['TRANSFORMERS_CACHE'] = cache_dir

print(f"Transformers cache directory: {os.environ['TRANSFORMERS_CACHE']}")

# Ensure your Hugging Face token is set as an environment variable
huggingface_token = os.environ.get("TOKEN")
if not huggingface_token:
    raise ValueError("TOKEN environment variable is not set.")

# Load the tokenizer and model using Hugging Face's library with the token
try:
    tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it", token=huggingface_token)
    model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it", token=huggingface_token)

    # Initialize the pipeline
    generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0  # Assuming you're using a GPU, otherwise set to -1 for CPU
    )
except Exception as e:
    raise RuntimeError(f"Failed to load model: {e}")

# Data model for the request body
class Item(BaseModel):
    prompt: str
    temperature: float = 0.7
    max_new_tokens: int = 128

# Endpoint for generating text
@app.post("/")
async def generate_text(item: Item):
    try:
        if not item.prompt:
            raise HTTPException(status_code=400, detail="`prompt` field is required")

        output = generator(
            item.prompt,
            temperature=item.temperature,
            max_new_tokens=item.max_new_tokens,
        )

        return {"generated_text": output[0]['generated_text']}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"An error occurred: {e}")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
'''