Haoyu Lu commited on
Commit
9ab6f3e
·
1 Parent(s): 1d2ac98

Feats: Update inference

Browse files
Files changed (2) hide show
  1. kimi_vl/serve/inference.py +2 -2
  2. requirements.txt +1 -0
kimi_vl/serve/inference.py CHANGED
@@ -4,7 +4,7 @@ from threading import Thread
4
  from typing import List, Optional
5
 
6
  import torch
7
- import space
8
  from transformers import (
9
  AutoModelForCausalLM,
10
  AutoProcessor,
@@ -154,7 +154,7 @@ def preprocess(
154
 
155
 
156
  @torch.inference_mode()
157
- @space.GPU
158
  def kimi_vl_generate(
159
  model: torch.nn.Module,
160
  processor: AutoProcessor,
 
4
  from typing import List, Optional
5
 
6
  import torch
7
+ import spaces
8
  from transformers import (
9
  AutoModelForCausalLM,
10
  AutoProcessor,
 
154
 
155
 
156
  @torch.inference_mode()
157
+ @spaces.GPU
158
  def kimi_vl_generate(
159
  model: torch.nn.Module,
160
  processor: AutoProcessor,
requirements.txt CHANGED
@@ -6,6 +6,7 @@ attrdict
6
  einops
7
  tiktoken
8
  blobfile
 
9
  https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
10
 
11
  # for gradio demo
 
6
  einops
7
  tiktoken
8
  blobfile
9
+ spaces
10
  https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.5cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
11
 
12
  # for gradio demo