Spaces:

chryzxc
/

portfolio-inference-provider

Sleeping

App Files Files Community

chryzxc commited on May 16

Commit

0d7d4cd

verified ·

1 Parent(s): 0e2d401

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -28

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from fastapi import FastAPI, HTTPException, Request
 from onnxruntime import InferenceSession
 from transformers import AutoTokenizer
 import numpy as np
-import os
 import uvicorn
 app = FastAPI()
@@ -15,21 +15,23 @@ tokenizer = AutoTokenizer.from_pretrained(
 )
 # Load ONNX model
-try:
-    session = InferenceSession("model.onnx")
-    print("Model loaded successfully")
-except Exception as e:
-    print(f"Failed to load model: {str(e)}")
-    raise
-@app.get("/")
-def health_check():
-    return {"status": "OK", "model": "ONNX"}
 @app.post("/api/predict")
 async def predict(request: Request):
     try:
-        # Get JSON input
         data = await request.json()
         text = data.get("text", "")
@@ -45,30 +47,22 @@ async def predict(request: Request):
             max_length=32
         )
-        # Prepare ONNX inputs with correct shapes
-        onnx_inputs = {
             "input_ids": inputs["input_ids"].astype(np.int64),
             "attention_mask": inputs["attention_mask"].astype(np.int64)
-        }
-        # Run inference
-        outputs = session.run(None, onnx_inputs)
-        # Convert outputs to list and handle numpy types
-        embedding = outputs[0][0].astype(float).tolist()  # First output, first batch
-        return {
-            "embedding": embedding,
             "tokens": tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
-    uvicorn.run(
-        "app:app",
-        host="0.0.0.0",
-        port=7860,
-        reload=False
-    )

 from fastapi import FastAPI, HTTPException, Request
+from fastapi.encoders import jsonable_encoder
 from onnxruntime import InferenceSession
 from transformers import AutoTokenizer
 import numpy as np
 import uvicorn
 app = FastAPI()
 )
 # Load ONNX model
+session = InferenceSession("model.onnx")
+def convert_output(value):
+    """Recursively convert numpy types to native Python types"""
+    if isinstance(value, (np.generic, np.ndarray)):
+        if value.size == 1:
+            return float(value.item())  # Convert single values to float
+        return value.astype(float).tolist()  # Convert arrays to list
+    elif isinstance(value, list):
+        return [convert_output(x) for x in value]
+    elif isinstance(value, dict):
+        return {k: convert_output(v) for k, v in value.items()}
+    return value
 @app.post("/api/predict")
 async def predict(request: Request):
     try:
         data = await request.json()
         text = data.get("text", "")
             max_length=32
         )
+        # Run model
+        outputs = session.run(None, {
             "input_ids": inputs["input_ids"].astype(np.int64),
             "attention_mask": inputs["attention_mask"].astype(np.int64)
+        })
+        # Prepare response with converted types
+        response = {
+            "embedding": convert_output(outputs[0]),  # Process main output
             "tokens": tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
         }
+        return jsonable_encoder(response)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)