mojaalagevai commited on
Commit
61d4f38
·
verified ·
1 Parent(s): 7d65968

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import os
2
- import sys
3
  from typing import List, Tuple, Optional
4
 
5
- from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
  from huggingface_hub import hf_hub_download
8
  from llama_cpp import Llama
@@ -50,10 +49,10 @@ for model_info in MODELS_INFO:
50
  except Exception as e:
51
  print(f"Error downloading {model_info['filename']}: {e}")
52
 
53
- # Available model keys
54
  AVAILABLE_MODELS = {
55
- "llama": "Dolphin3.0-Llama3.2-1B-Q4_K_M.gguf",
56
  "qwen": "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
 
57
  "coder": "Qwen2.5-Coder-14B-Instruct-Q6_K.gguf"
58
  }
59
 
@@ -86,28 +85,40 @@ def load_model(model_key: str):
86
 
87
 
88
  class ChatRequest(BaseModel):
89
- message: str
90
- history: List[Tuple[str, str]] = []
91
- model: str = "qwen"
92
- system_prompt: str = "You are Dolphin, a helpful AI assistant."
93
- max_tokens: int = 1024
94
- temperature: float = 0.7
95
- top_p: float = 0.95
96
- top_k: int = 40
97
- repeat_penalty: float = 1.1
98
 
99
 
100
  class ChatResponse(BaseModel):
101
  response: str
102
 
103
 
 
 
 
 
104
  app = FastAPI(
105
  title="Dolphin 3.0 LLM API",
106
  description="REST API for Dolphin 3.0 models using Llama.cpp backend.",
107
- version="1.0"
 
 
108
  )
109
 
110
 
 
 
 
 
 
 
111
  @app.post("/chat", response_model=ChatResponse)
112
  def chat(request: ChatRequest):
113
  try:
@@ -150,11 +161,6 @@ def chat(request: ChatRequest):
150
  raise HTTPException(status_code=500, detail=str(e))
151
 
152
 
153
- @app.get("/")
154
- def read_root():
155
- return {"message": "Welcome to Dolphin 3.0 FastAPI LLM Server!"}
156
-
157
-
158
  if __name__ == "__main__":
159
  import uvicorn
160
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import os
 
2
  from typing import List, Tuple, Optional
3
 
4
+ from fastapi import FastAPI
5
  from pydantic import BaseModel
6
  from huggingface_hub import hf_hub_download
7
  from llama_cpp import Llama
 
49
  except Exception as e:
50
  print(f"Error downloading {model_info['filename']}: {e}")
51
 
52
+ # Available model keys (used in API)
53
  AVAILABLE_MODELS = {
 
54
  "qwen": "Dolphin3.0-Qwen2.5-0.5B-Q6_K.gguf",
55
+ "llama": "Dolphin3.0-Llama3.2-1B-Q4_K_M.gguf",
56
  "coder": "Qwen2.5-Coder-14B-Instruct-Q6_K.gguf"
57
  }
58
 
 
85
 
86
 
87
  class ChatRequest(BaseModel):
88
+ message: str # Required
89
+ history: Optional[List[Tuple[str, str]]] = [] # Default: empty list
90
+ model: Optional[str] = "qwen" # Default model key
91
+ system_prompt: Optional[str] = "You are Dolphin, a helpful AI assistant."
92
+ max_tokens: Optional[int] = 1024
93
+ temperature: Optional[float] = 0.7
94
+ top_p: Optional[float] = 0.95
95
+ top_k: Optional[int] = 40
96
+ repeat_penalty: Optional[float] = 1.1
97
 
98
 
99
  class ChatResponse(BaseModel):
100
  response: str
101
 
102
 
103
+ class ModelInfoResponse(BaseModel):
104
+ models: List[str]
105
+
106
+
107
  app = FastAPI(
108
  title="Dolphin 3.0 LLM API",
109
  description="REST API for Dolphin 3.0 models using Llama.cpp backend.",
110
+ version="1.0",
111
+ docs_url="/docs", # Only Swagger docs
112
+ redoc_url=None # Disable ReDoc
113
  )
114
 
115
 
116
+ @app.get("/models", response_model=ModelInfoResponse)
117
+ def get_available_models():
118
+ """Returns the list of supported models."""
119
+ return {"models": list(AVAILABLE_MODELS.keys())}
120
+
121
+
122
  @app.post("/chat", response_model=ChatResponse)
123
  def chat(request: ChatRequest):
124
  try:
 
161
  raise HTTPException(status_code=500, detail=str(e))
162
 
163
 
 
 
 
 
 
164
  if __name__ == "__main__":
165
  import uvicorn
166
+ uvicorn.run(app, host="0.0.0.0", port=8000)