first app version
Browse files- .beamignore +28 -0
- .gitattributes +0 -35
- .gitignore +8 -0
- README.md +1 -0
- app.py +124 -0
- index.html +0 -19
- style.css +0 -28
- utils.py +36 -0
.beamignore
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Generated by Beam SDK
|
2 |
+
.beamignore
|
3 |
+
pyproject.toml
|
4 |
+
.git
|
5 |
+
.idea
|
6 |
+
.python-version
|
7 |
+
.vscode
|
8 |
+
.venv
|
9 |
+
venv
|
10 |
+
__pycache__
|
11 |
+
.DS_Store
|
12 |
+
.config
|
13 |
+
drive/MyDrive
|
14 |
+
.coverage
|
15 |
+
.pytest_cache
|
16 |
+
.ipynb
|
17 |
+
.ruff_cache
|
18 |
+
.dockerignore
|
19 |
+
.ipynb_checkpoints
|
20 |
+
.env.local
|
21 |
+
.envrc
|
22 |
+
**/__pycache__/
|
23 |
+
**/.pytest_cache/
|
24 |
+
**/node_modules/
|
25 |
+
**/.venv/
|
26 |
+
*.pyc
|
27 |
+
.next/
|
28 |
+
.circleci
|
.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
__pycache__
|
3 |
+
__downloads__
|
4 |
+
.env
|
5 |
+
.venv/
|
6 |
+
.vscode/launch.json
|
7 |
+
.secrets
|
8 |
+
.idea/
|
README.md
CHANGED
@@ -4,6 +4,7 @@ emoji: 🚀
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: pink
|
6 |
sdk: static
|
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: pink
|
6 |
sdk: static
|
7 |
+
app_file: README.md
|
8 |
pinned: false
|
9 |
---
|
10 |
|
app.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from threading import Thread
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from beam import Image, Volume, GpuType, asgi
|
5 |
+
from fastapi import FastAPI
|
6 |
+
from fastapi.responses import StreamingResponse
|
7 |
+
from transformers import (
|
8 |
+
AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer,
|
9 |
+
PreTrainedTokenizerFast, PreTrainedModel, StoppingCriteriaList
|
10 |
+
)
|
11 |
+
|
12 |
+
from utils import MaxPostsStoppingCriteria, Body, fallback
|
13 |
+
|
14 |
+
SETTINGS = {
|
15 |
+
"model_name": "Error410/JVCGPT-Medium",
|
16 |
+
"beam_volume_path": "./cached_models",
|
17 |
+
}
|
18 |
+
|
19 |
+
# @see https://huggingface.co/docs/transformers/generation_strategies#customize-text-generation
|
20 |
+
DEFAULTS = {
|
21 |
+
"max_length": 2048, # 512
|
22 |
+
"temperature": 0.9, # 1
|
23 |
+
"top_p": 1, # 0.95
|
24 |
+
"top_k": 0, # 40
|
25 |
+
"repetition_penalty": 1.0, # 1.0
|
26 |
+
"no_repeat_ngram_size": 0, # 0
|
27 |
+
"do_sample": True, # True
|
28 |
+
}
|
29 |
+
|
30 |
+
|
31 |
+
def load_models():
|
32 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
33 |
+
SETTINGS["model_name"],
|
34 |
+
cache_dir=SETTINGS["beam_volume_path"]
|
35 |
+
)
|
36 |
+
tokenizer.pad_token = tokenizer.eos_token
|
37 |
+
model = AutoModelForCausalLM.from_pretrained(
|
38 |
+
SETTINGS["model_name"],
|
39 |
+
device_map="auto",
|
40 |
+
torch_dtype=torch.float16,
|
41 |
+
cache_dir=SETTINGS["beam_volume_path"],
|
42 |
+
)
|
43 |
+
return model, tokenizer
|
44 |
+
|
45 |
+
|
46 |
+
def stream(model: PreTrainedModel, tokenizer: PreTrainedTokenizerFast, body: Body):
|
47 |
+
generate_args = {
|
48 |
+
"max_length": fallback(body.max_length, DEFAULTS["max_length"]),
|
49 |
+
"temperature": fallback(body.temperature, DEFAULTS["temperature"]),
|
50 |
+
"top_p": fallback(body.top_p, DEFAULTS["top_p"]),
|
51 |
+
"top_k": fallback(body.top_k, DEFAULTS["top_k"]),
|
52 |
+
"repetition_penalty": fallback(body.repetition_penalty, DEFAULTS["repetition_penalty"]),
|
53 |
+
"no_repeat_ngram_size": fallback(body.no_repeat_ngram_size, DEFAULTS["no_repeat_ngram_size"]),
|
54 |
+
"do_sample": fallback(body.do_sample, DEFAULTS["do_sample"]),
|
55 |
+
"use_cache": True,
|
56 |
+
"eos_token_id": tokenizer.eos_token_id,
|
57 |
+
"pad_token_id": tokenizer.pad_token_id,
|
58 |
+
}
|
59 |
+
|
60 |
+
inputs = tokenizer(body.prompt, return_tensors="pt", padding=True)
|
61 |
+
input_ids = inputs["input_ids"].to("cuda")
|
62 |
+
attention_mask = inputs["attention_mask"].to("cuda")
|
63 |
+
|
64 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False, timeout=240)
|
65 |
+
|
66 |
+
# with torch.no_grad(): # seems to be useless
|
67 |
+
thread = Thread(
|
68 |
+
target=model.generate,
|
69 |
+
kwargs={
|
70 |
+
"input_ids": input_ids,
|
71 |
+
"attention_mask": attention_mask,
|
72 |
+
"streamer": streamer,
|
73 |
+
"stopping_criteria": StoppingCriteriaList([MaxPostsStoppingCriteria(tokenizer, body.posts_count)]),
|
74 |
+
**generate_args,
|
75 |
+
}
|
76 |
+
)
|
77 |
+
thread.start()
|
78 |
+
|
79 |
+
for token in streamer:
|
80 |
+
yield token
|
81 |
+
# if len(token) > 0:
|
82 |
+
# yield f"DATA {token}"
|
83 |
+
#
|
84 |
+
# yield "EOS"
|
85 |
+
|
86 |
+
|
87 |
+
@asgi(
|
88 |
+
on_start=load_models,
|
89 |
+
cpu=2.0,
|
90 |
+
memory="16Gi",
|
91 |
+
gpu=GpuType.A100_40,
|
92 |
+
gpu_count=1,
|
93 |
+
timeout=900, # Time for loading the model and run the server
|
94 |
+
image=Image(
|
95 |
+
python_version="python3.12",
|
96 |
+
python_packages=[
|
97 |
+
"fastapi",
|
98 |
+
"torch",
|
99 |
+
"transformers",
|
100 |
+
"accelerate",
|
101 |
+
"huggingface_hub[hf-transfer]",
|
102 |
+
],
|
103 |
+
env_vars=["HF_HUB_ENABLE_HF_TRANSFER=1"],
|
104 |
+
),
|
105 |
+
volumes=[
|
106 |
+
Volume(
|
107 |
+
name="cached_models",
|
108 |
+
mount_path=SETTINGS["beam_volume_path"],
|
109 |
+
)
|
110 |
+
],
|
111 |
+
)
|
112 |
+
def server(context):
|
113 |
+
model, tokenizer = context.on_start_value
|
114 |
+
app = FastAPI()
|
115 |
+
|
116 |
+
@app.post("/stream")
|
117 |
+
async def stream_endpoint(body: Body) -> StreamingResponse:
|
118 |
+
return StreamingResponse(
|
119 |
+
stream(model, tokenizer, body),
|
120 |
+
media_type='text/event-stream',
|
121 |
+
headers={"Cache-Control": "no-cache"},
|
122 |
+
)
|
123 |
+
|
124 |
+
return app
|
index.html
DELETED
@@ -1,19 +0,0 @@
|
|
1 |
-
<!doctype html>
|
2 |
-
<html>
|
3 |
-
<head>
|
4 |
-
<meta charset="utf-8" />
|
5 |
-
<meta name="viewport" content="width=device-width" />
|
6 |
-
<title>My static Space</title>
|
7 |
-
<link rel="stylesheet" href="style.css" />
|
8 |
-
</head>
|
9 |
-
<body>
|
10 |
-
<div class="card">
|
11 |
-
<h1>Welcome to your static Space!</h1>
|
12 |
-
<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
|
13 |
-
<p>
|
14 |
-
Also don't forget to check the
|
15 |
-
<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
|
16 |
-
</p>
|
17 |
-
</div>
|
18 |
-
</body>
|
19 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
style.css
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
body {
|
2 |
-
padding: 2rem;
|
3 |
-
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
|
4 |
-
}
|
5 |
-
|
6 |
-
h1 {
|
7 |
-
font-size: 16px;
|
8 |
-
margin-top: 0;
|
9 |
-
}
|
10 |
-
|
11 |
-
p {
|
12 |
-
color: rgb(107, 114, 128);
|
13 |
-
font-size: 15px;
|
14 |
-
margin-bottom: 10px;
|
15 |
-
margin-top: 5px;
|
16 |
-
}
|
17 |
-
|
18 |
-
.card {
|
19 |
-
max-width: 620px;
|
20 |
-
margin: 0 auto;
|
21 |
-
padding: 16px;
|
22 |
-
border: 1px solid lightgray;
|
23 |
-
border-radius: 16px;
|
24 |
-
}
|
25 |
-
|
26 |
-
.card p:last-child {
|
27 |
-
margin-bottom: 0;
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
from transformers import (PreTrainedTokenizerFast, StoppingCriteria)
|
3 |
+
|
4 |
+
|
5 |
+
def fallback(value, fallback_value):
|
6 |
+
if value is None:
|
7 |
+
return fallback_value
|
8 |
+
return value
|
9 |
+
|
10 |
+
|
11 |
+
class Body(BaseModel):
|
12 |
+
prompt: str
|
13 |
+
posts_count: int
|
14 |
+
max_length: int | None = None
|
15 |
+
temperature: float | None = None
|
16 |
+
top_p: float | None = None
|
17 |
+
top_k: float | None = None
|
18 |
+
repetition_penalty: float | None = None
|
19 |
+
no_repeat_ngram_size: float | None = None
|
20 |
+
do_sample: bool | None = None
|
21 |
+
|
22 |
+
|
23 |
+
class MaxPostsStoppingCriteria(StoppingCriteria):
|
24 |
+
def __init__(self, tokenizer: PreTrainedTokenizerFast, posts_count: int):
|
25 |
+
self.end_of_post_token_id = tokenizer.encode("<|end_of_post|>", add_special_tokens=False)
|
26 |
+
self.posts_count = posts_count
|
27 |
+
self.counter = 0
|
28 |
+
|
29 |
+
def __call__(self, input_ids, scores, **kwargs):
|
30 |
+
# Check if the last token matches the <|end_of_post|> token ID
|
31 |
+
for sequence in input_ids:
|
32 |
+
if sequence[-len(self.end_of_post_token_id):].tolist() == self.end_of_post_token_id:
|
33 |
+
self.counter += 1
|
34 |
+
if self.counter >= self.posts_count:
|
35 |
+
return True
|
36 |
+
return False
|