Greums commited on
Commit
06126dc
·
1 Parent(s): 3a196db

first app version

Browse files
Files changed (8) hide show
  1. .beamignore +28 -0
  2. .gitattributes +0 -35
  3. .gitignore +8 -0
  4. README.md +1 -0
  5. app.py +124 -0
  6. index.html +0 -19
  7. style.css +0 -28
  8. utils.py +36 -0
.beamignore ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by Beam SDK
2
+ .beamignore
3
+ pyproject.toml
4
+ .git
5
+ .idea
6
+ .python-version
7
+ .vscode
8
+ .venv
9
+ venv
10
+ __pycache__
11
+ .DS_Store
12
+ .config
13
+ drive/MyDrive
14
+ .coverage
15
+ .pytest_cache
16
+ .ipynb
17
+ .ruff_cache
18
+ .dockerignore
19
+ .ipynb_checkpoints
20
+ .env.local
21
+ .envrc
22
+ **/__pycache__/
23
+ **/.pytest_cache/
24
+ **/node_modules/
25
+ **/.venv/
26
+ *.pyc
27
+ .next/
28
+ .circleci
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ __pycache__
3
+ __downloads__
4
+ .env
5
+ .venv/
6
+ .vscode/launch.json
7
+ .secrets
8
+ .idea/
README.md CHANGED
@@ -4,6 +4,7 @@ emoji: 🚀
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: static
 
7
  pinned: false
8
  ---
9
 
 
4
  colorFrom: indigo
5
  colorTo: pink
6
  sdk: static
7
+ app_file: README.md
8
  pinned: false
9
  ---
10
 
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from threading import Thread
2
+
3
+ import torch
4
+ from beam import Image, Volume, GpuType, asgi
5
+ from fastapi import FastAPI
6
+ from fastapi.responses import StreamingResponse
7
+ from transformers import (
8
+ AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer,
9
+ PreTrainedTokenizerFast, PreTrainedModel, StoppingCriteriaList
10
+ )
11
+
12
+ from utils import MaxPostsStoppingCriteria, Body, fallback
13
+
14
+ SETTINGS = {
15
+ "model_name": "Error410/JVCGPT-Medium",
16
+ "beam_volume_path": "./cached_models",
17
+ }
18
+
19
+ # @see https://huggingface.co/docs/transformers/generation_strategies#customize-text-generation
20
+ DEFAULTS = {
21
+ "max_length": 2048, # 512
22
+ "temperature": 0.9, # 1
23
+ "top_p": 1, # 0.95
24
+ "top_k": 0, # 40
25
+ "repetition_penalty": 1.0, # 1.0
26
+ "no_repeat_ngram_size": 0, # 0
27
+ "do_sample": True, # True
28
+ }
29
+
30
+
31
+ def load_models():
32
+ tokenizer = AutoTokenizer.from_pretrained(
33
+ SETTINGS["model_name"],
34
+ cache_dir=SETTINGS["beam_volume_path"]
35
+ )
36
+ tokenizer.pad_token = tokenizer.eos_token
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ SETTINGS["model_name"],
39
+ device_map="auto",
40
+ torch_dtype=torch.float16,
41
+ cache_dir=SETTINGS["beam_volume_path"],
42
+ )
43
+ return model, tokenizer
44
+
45
+
46
+ def stream(model: PreTrainedModel, tokenizer: PreTrainedTokenizerFast, body: Body):
47
+ generate_args = {
48
+ "max_length": fallback(body.max_length, DEFAULTS["max_length"]),
49
+ "temperature": fallback(body.temperature, DEFAULTS["temperature"]),
50
+ "top_p": fallback(body.top_p, DEFAULTS["top_p"]),
51
+ "top_k": fallback(body.top_k, DEFAULTS["top_k"]),
52
+ "repetition_penalty": fallback(body.repetition_penalty, DEFAULTS["repetition_penalty"]),
53
+ "no_repeat_ngram_size": fallback(body.no_repeat_ngram_size, DEFAULTS["no_repeat_ngram_size"]),
54
+ "do_sample": fallback(body.do_sample, DEFAULTS["do_sample"]),
55
+ "use_cache": True,
56
+ "eos_token_id": tokenizer.eos_token_id,
57
+ "pad_token_id": tokenizer.pad_token_id,
58
+ }
59
+
60
+ inputs = tokenizer(body.prompt, return_tensors="pt", padding=True)
61
+ input_ids = inputs["input_ids"].to("cuda")
62
+ attention_mask = inputs["attention_mask"].to("cuda")
63
+
64
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False, timeout=240)
65
+
66
+ # with torch.no_grad(): # seems to be useless
67
+ thread = Thread(
68
+ target=model.generate,
69
+ kwargs={
70
+ "input_ids": input_ids,
71
+ "attention_mask": attention_mask,
72
+ "streamer": streamer,
73
+ "stopping_criteria": StoppingCriteriaList([MaxPostsStoppingCriteria(tokenizer, body.posts_count)]),
74
+ **generate_args,
75
+ }
76
+ )
77
+ thread.start()
78
+
79
+ for token in streamer:
80
+ yield token
81
+ # if len(token) > 0:
82
+ # yield f"DATA {token}"
83
+ #
84
+ # yield "EOS"
85
+
86
+
87
+ @asgi(
88
+ on_start=load_models,
89
+ cpu=2.0,
90
+ memory="16Gi",
91
+ gpu=GpuType.A100_40,
92
+ gpu_count=1,
93
+ timeout=900, # Time for loading the model and run the server
94
+ image=Image(
95
+ python_version="python3.12",
96
+ python_packages=[
97
+ "fastapi",
98
+ "torch",
99
+ "transformers",
100
+ "accelerate",
101
+ "huggingface_hub[hf-transfer]",
102
+ ],
103
+ env_vars=["HF_HUB_ENABLE_HF_TRANSFER=1"],
104
+ ),
105
+ volumes=[
106
+ Volume(
107
+ name="cached_models",
108
+ mount_path=SETTINGS["beam_volume_path"],
109
+ )
110
+ ],
111
+ )
112
+ def server(context):
113
+ model, tokenizer = context.on_start_value
114
+ app = FastAPI()
115
+
116
+ @app.post("/stream")
117
+ async def stream_endpoint(body: Body) -> StreamingResponse:
118
+ return StreamingResponse(
119
+ stream(model, tokenizer, body),
120
+ media_type='text/event-stream',
121
+ headers={"Cache-Control": "no-cache"},
122
+ )
123
+
124
+ return app
index.html DELETED
@@ -1,19 +0,0 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
style.css DELETED
@@ -1,28 +0,0 @@
1
- body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
- }
5
-
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
9
- }
10
-
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
16
- }
17
-
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
24
- }
25
-
26
- .card p:last-child {
27
- margin-bottom: 0;
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from transformers import (PreTrainedTokenizerFast, StoppingCriteria)
3
+
4
+
5
+ def fallback(value, fallback_value):
6
+ if value is None:
7
+ return fallback_value
8
+ return value
9
+
10
+
11
+ class Body(BaseModel):
12
+ prompt: str
13
+ posts_count: int
14
+ max_length: int | None = None
15
+ temperature: float | None = None
16
+ top_p: float | None = None
17
+ top_k: float | None = None
18
+ repetition_penalty: float | None = None
19
+ no_repeat_ngram_size: float | None = None
20
+ do_sample: bool | None = None
21
+
22
+
23
+ class MaxPostsStoppingCriteria(StoppingCriteria):
24
+ def __init__(self, tokenizer: PreTrainedTokenizerFast, posts_count: int):
25
+ self.end_of_post_token_id = tokenizer.encode("<|end_of_post|>", add_special_tokens=False)
26
+ self.posts_count = posts_count
27
+ self.counter = 0
28
+
29
+ def __call__(self, input_ids, scores, **kwargs):
30
+ # Check if the last token matches the <|end_of_post|> token ID
31
+ for sequence in input_ids:
32
+ if sequence[-len(self.end_of_post_token_id):].tolist() == self.end_of_post_token_id:
33
+ self.counter += 1
34
+ if self.counter >= self.posts_count:
35
+ return True
36
+ return False