Spaces:
Running
on
Zero
Running
on
Zero
Haoyu Lu
commited on
Commit
·
9ab6f3e
1
Parent(s):
1d2ac98
Feats: Update inference
Browse files- kimi_vl/serve/inference.py +2 -2
- requirements.txt +1 -0
kimi_vl/serve/inference.py
CHANGED
@@ -4,7 +4,7 @@ from threading import Thread
|
|
4 |
from typing import List, Optional
|
5 |
|
6 |
import torch
|
7 |
-
import
|
8 |
from transformers import (
|
9 |
AutoModelForCausalLM,
|
10 |
AutoProcessor,
|
@@ -154,7 +154,7 @@ def preprocess(
|
|
154 |
|
155 |
|
156 |
@torch.inference_mode()
|
157 |
-
@
|
158 |
def kimi_vl_generate(
|
159 |
model: torch.nn.Module,
|
160 |
processor: AutoProcessor,
|
|
|
4 |
from typing import List, Optional
|
5 |
|
6 |
import torch
|
7 |
+
import spaces
|
8 |
from transformers import (
|
9 |
AutoModelForCausalLM,
|
10 |
AutoProcessor,
|
|
|
154 |
|
155 |
|
156 |
@torch.inference_mode()
|
157 |
+
@spaces.GPU
|
158 |
def kimi_vl_generate(
|
159 |
model: torch.nn.Module,
|
160 |
processor: AutoProcessor,
|
requirements.txt
CHANGED
@@ -6,6 +6,7 @@ attrdict
|
|
6 |
einops
|
7 |
tiktoken
|
8 |
blobfile
|
|
|
9 |
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
10 |
|
11 |
# for gradio demo
|
|
|
6 |
einops
|
7 |
tiktoken
|
8 |
blobfile
|
9 |
+
spaces
|
10 |
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
11 |
|
12 |
# for gradio demo
|