Spaces:

mojaalagevai
/

llmapi2

Running

App Files Files Community

mojaalagevai commited on 29 days ago

Commit

61d4f38

verified ·

1 Parent(s): 7d65968

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -20

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
 import os
-import sys
 from typing import List, Tuple, Optional
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
@@ -50,10 +49,10 @@ for model_info in MODELS_INFO:
         except Exception as e:
             print(f"Error downloading {model_info['filename']}: {e}")
-# Available model keys
 AVAILABLE_MODELS = {
-    "llama": "Dolphin3.0-Llama3.2-1B-Q4_K_M.gguf",
     "qwen": "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
     "coder": "Qwen2.5-Coder-14B-Instruct-Q6_K.gguf"
 }
@@ -86,28 +85,40 @@ def load_model(model_key: str):
 class ChatRequest(BaseModel):
-    message: str
-    history: List[Tuple[str, str]] = []
-    model: str = "qwen"
-    system_prompt: str = "You are Dolphin, a helpful AI assistant."
-    max_tokens: int = 1024
-    temperature: float = 0.7
-    top_p: float = 0.95
-    top_k: int = 40
-    repeat_penalty: float = 1.1
 class ChatResponse(BaseModel):
     response: str
 app = FastAPI(
     title="Dolphin 3.0 LLM API",
     description="REST API for Dolphin 3.0 models using Llama.cpp backend.",
-    version="1.0"
 )
 @app.post("/chat", response_model=ChatResponse)
 def chat(request: ChatRequest):
     try:
@@ -150,11 +161,6 @@ def chat(request: ChatRequest):
         raise HTTPException(status_code=500, detail=str(e))
-@app.get("/")
-def read_root():
-    return {"message": "Welcome to Dolphin 3.0 FastAPI LLM Server!"}
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 from typing import List, Tuple, Optional
+from fastapi import FastAPI
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
         except Exception as e:
             print(f"Error downloading {model_info['filename']}: {e}")
+# Available model keys (used in API)
 AVAILABLE_MODELS = {
     "qwen": "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
+    "llama": "Dolphin3.0-Llama3.2-1B-Q4_K_M.gguf",
     "coder": "Qwen2.5-Coder-14B-Instruct-Q6_K.gguf"
 }
 class ChatRequest(BaseModel):
+    message: str  # Required
+    history: Optional[List[Tuple[str, str]]] = []  # Default: empty list
+    model: Optional[str] = "qwen"  # Default model key
+    system_prompt: Optional[str] = "You are Dolphin, a helpful AI assistant."
+    max_tokens: Optional[int] = 1024
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 0.95
+    top_k: Optional[int] = 40
+    repeat_penalty: Optional[float] = 1.1
 class ChatResponse(BaseModel):
     response: str
+class ModelInfoResponse(BaseModel):
+    models: List[str]
 app = FastAPI(
     title="Dolphin 3.0 LLM API",
     description="REST API for Dolphin 3.0 models using Llama.cpp backend.",
+    version="1.0",
+    docs_url="/docs",  # Only Swagger docs
+    redoc_url=None    # Disable ReDoc
 )
+@app.get("/models", response_model=ModelInfoResponse)
+def get_available_models():
+    """Returns the list of supported models."""
+    return {"models": list(AVAILABLE_MODELS.keys())}
 @app.post("/chat", response_model=ChatResponse)
 def chat(request: ChatRequest):
     try:
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)