Hjgugugjhuhjggg commited on
Commit
1d24d56
·
verified ·
1 Parent(s): 78b48a5

Upload 27 files

Browse files
Files changed (3) hide show
  1. Dockerfile +50 -19
  2. model_loader.py +71 -17
  3. requirements.txt +40 -41
Dockerfile CHANGED
@@ -1,20 +1,51 @@
1
- FROM python:3.11-slim-buster
2
-
3
- ENV DEBIAN_FRONTEND=noninteractive
4
- ENV NUMBA_DISABLE_CACHE=1
5
- WORKDIR /app
6
-
7
- RUN apt-get update && apt-get upgrade -y
8
- RUN apt-get install libgl1-mesa-glx ffmpeg -y
9
-
10
- RUN mkdir -p /.cache/huggingface/hub && chmod -R 777 /.cache/huggingface/hub
11
- RUN mkdir -p /.config/matplotlib && chmod -R 777 /.config/matplotlib
12
- RUN mkdir -p /nltk_data && chmod -R 777 /nltk_data
13
-
14
- RUN pip install --no-cache-dir faker accelerate retry asyncio basicsr beautifulsoup4 bs4 opencv-python deep-translator duckduckgo-search fastapi flask flask-cors facexlib ffmpeg-python gfpgan imageio imageio-ffmpeg langdetect librosa nltk numpy Pillow pydub pytorch-lightning PyYAML retry safetensors scikit-learn scipy scikit-image soundfile torch torchaudio torchvision tqdm wget yacs numba
15
-
16
- COPY . .
17
-
18
- EXPOSE 7860
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  CMD ["python", "main.py"]
 
1
+ FROM python:3.11-slim-buster
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV NUMBA_DISABLE_CACHE=1
5
+ WORKDIR /app
6
+
7
+ RUN apt-get update && apt-get upgrade -y
8
+ RUN apt-get install libgl1-mesa-glx ffmpeg -y
9
+
10
+ RUN mkdir -p /.cache/huggingface/hub && chmod -R 777 /.cache/huggingface/hub
11
+ RUN mkdir -p /.config/matplotlib && chmod -R 777 /.config/matplotlib
12
+ RUN mkdir -p /GPT2 && chmod -R 777 /GPT2
13
+ RUN mkdir -p /nltk_data && chmod -R 777 /nltk_data
14
+ RUN mkdir -p /TranslationModel \
15
+ /CodeGenModel \
16
+ /ImageGenModel \
17
+ /MusicGenModel \
18
+ /SentimentModel \
19
+ /SummarizationModel \
20
+ /STTModel \
21
+ /TTSModel \
22
+ /ImageTo3DModel \
23
+ /TextToVideoModel \
24
+ /GFPGAN \
25
+ /RestoreFormer \
26
+ /CodeFormer \
27
+ /RealESRGAN \
28
+ /checkpoints && \
29
+ chmod -R 777 /TranslationModel \
30
+ /CodeGenModel \
31
+ /ImageGenModel \
32
+ /MusicGenModel \
33
+ /SentimentModel \
34
+ /SummarizationModel \
35
+ /STTModel \
36
+ /TTSModel \
37
+ /ImageTo3DModel \
38
+ /TextToVideoModel \
39
+ /GFPGAN \
40
+ /RestoreFormer \
41
+ /CodeFormer \
42
+ /RealESRGAN \
43
+ /checkpoints
44
+
45
+ RUN pip install --no-cache-dir accelerate retry asyncio basicsr beautifulsoup4 bs4 opencv-python deep-translator duckduckgo-search fastapi flask flask-cors facexlib ffmpeg-python gfpgan imageio imageio-ffmpeg langdetect librosa nltk numpy Pillow pydub pytorch-lightning PyYAML retry safetensors scikit-learn scipy scikit-image soundfile torch torchaudio torchvision tqdm wget yacs numba
46
+
47
+ COPY . .
48
+
49
+ EXPOSE 7860
50
+
51
  CMD ["python", "main.py"]
model_loader.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import os
2
  import json
3
  import urllib.request
@@ -28,17 +31,20 @@ def download_file(url, filepath):
28
  d = os.path.dirname(filepath)
29
  if d and not os.path.exists(d):
30
  os.makedirs(d, exist_ok=True)
31
- if not os.path.exists(filepath):
32
- def prog(t):
33
- last = [0]
34
- def inner(n, bs, ts):
35
- if ts > 0:
36
- t.total = ts
37
- t.update(n * bs - last[0])
38
- last[0] = n * bs
39
- return inner
40
- with tqdm(unit='B', unit_scale=True, unit_divisor=1024, desc=os.path.basename(filepath)) as t:
41
- urllib.request.urlretrieve(url, filepath, reporthook=prog(t))
 
 
 
42
 
43
  def download_files(folder, files_spec):
44
  if isinstance(files_spec, dict):
@@ -77,16 +83,56 @@ def get_codegen_tokenizer(vocab_path, merges_path):
77
  vocab = json.load(f)
78
  with open(merges_path, 'r', encoding='utf-8') as f:
79
  merges = f.read().splitlines()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def tokenizer(text):
81
- toks = text.split()
82
- return [vocab.get(t, 0) for t in toks]
 
 
 
 
83
  return tokenizer
84
 
85
  def simple_tokenizer(text, vocab, max_length=77):
86
  toks = text.split()
87
  ids = [vocab.get(t, 1) for t in toks]
88
  if len(ids) < max_length:
89
- ids = ids + [0]*(max_length - len(ids))
90
  else:
91
  ids = ids[:max_length]
92
  return torch.tensor(ids, dtype=torch.long).unsqueeze(0).to(device)
@@ -504,9 +550,17 @@ class DiffusionScheduler:
504
  def __init__(self, steps):
505
  self.steps = steps
506
  self.betas = torch.linspace(0.1, 0.001, steps=steps).to(device)
 
 
507
  def step(self, noise, t, sample):
508
- beta = self.betas[t]
509
- return sample - beta * noise
 
 
 
 
 
 
510
 
511
  class VideoOutput:
512
  def __init__(self, frames):
@@ -671,4 +725,4 @@ def initialize_musicgen_model(folder, files):
671
  sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
672
  load_state_dict_safe(model, sd)
673
  model.eval()
674
- return model
 
1
+ from tokenxxx import *
2
+ from constants import *
3
+ from utils import *
4
  import os
5
  import json
6
  import urllib.request
 
31
  d = os.path.dirname(filepath)
32
  if d and not os.path.exists(d):
33
  os.makedirs(d, exist_ok=True)
34
+ while not os.path.exists(filepath):
35
+ try:
36
+ def prog(t):
37
+ last = [0]
38
+ def inner(n, bs, ts):
39
+ if ts > 0:
40
+ t.total = ts
41
+ t.update(n * bs - last[0])
42
+ last[0] = n * bs
43
+ return inner
44
+ with tqdm(unit='B', unit_scale=True, unit_divisor=1024, desc=os.path.basename(filepath)) as t:
45
+ urllib.request.urlretrieve(url, filepath, reporthook=prog(t))
46
+ except Exception:
47
+ continue
48
 
49
  def download_files(folder, files_spec):
50
  if isinstance(files_spec, dict):
 
83
  vocab = json.load(f)
84
  with open(merges_path, 'r', encoding='utf-8') as f:
85
  merges = f.read().splitlines()
86
+ merge_ranks = {}
87
+ for i, merge in enumerate(merges):
88
+ parts = merge.strip().split()
89
+ if len(parts) == 2:
90
+ merge_ranks[tuple(parts)] = i
91
+ def bpe(token):
92
+ word = list(token)
93
+ pairs = [(word[i], word[i+1]) for i in range(len(word)-1)]
94
+ while True:
95
+ candidate = None
96
+ candidate_rank = None
97
+ candidate_index = None
98
+ for i, pair in enumerate(pairs):
99
+ if pair in merge_ranks:
100
+ rank = merge_ranks[pair]
101
+ if candidate is None or rank < candidate_rank:
102
+ candidate = pair
103
+ candidate_rank = rank
104
+ candidate_index = i
105
+ if candidate is None:
106
+ break
107
+ first, second = candidate
108
+ new_word = []
109
+ i = 0
110
+ while i < len(word):
111
+ if i < len(word) - 1 and word[i] == first and word[i+1] == second:
112
+ new_word.append(first + second)
113
+ i += 2
114
+ else:
115
+ new_word.append(word[i])
116
+ i += 1
117
+ word = new_word
118
+ if len(word) == 1:
119
+ break
120
+ pairs = [(word[i], word[i+1]) for i in range(len(word)-1)]
121
+ return word
122
  def tokenizer(text):
123
+ tokens = []
124
+ for token in text.split():
125
+ bpe_tokens = bpe(token)
126
+ for subtoken in bpe_tokens:
127
+ tokens.append(vocab.get(subtoken, 0))
128
+ return tokens
129
  return tokenizer
130
 
131
  def simple_tokenizer(text, vocab, max_length=77):
132
  toks = text.split()
133
  ids = [vocab.get(t, 1) for t in toks]
134
  if len(ids) < max_length:
135
+ ids = ids + [0] * (max_length - len(ids))
136
  else:
137
  ids = ids[:max_length]
138
  return torch.tensor(ids, dtype=torch.long).unsqueeze(0).to(device)
 
550
  def __init__(self, steps):
551
  self.steps = steps
552
  self.betas = torch.linspace(0.1, 0.001, steps=steps).to(device)
553
+ self.alphas = 1 - self.betas
554
+ self.alpha_bars = torch.cumprod(self.alphas, dim=0)
555
  def step(self, noise, t, sample):
556
+ alpha_bar = self.alpha_bars[t]
557
+ if t > 0:
558
+ alpha_bar_prev = self.alpha_bars[t-1]
559
+ else:
560
+ alpha_bar_prev = torch.tensor(1.0, device=sample.device)
561
+ x0 = (sample - torch.sqrt(1 - alpha_bar) * noise) / torch.sqrt(alpha_bar)
562
+ new_sample = torch.sqrt(alpha_bar_prev) * x0 + torch.sqrt(1 - alpha_bar_prev) * noise
563
+ return new_sample
564
 
565
  class VideoOutput:
566
  def __init__(self, frames):
 
725
  sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
726
  load_state_dict_safe(model, sd)
727
  model.eval()
728
+ return model
requirements.txt CHANGED
@@ -1,41 +1,40 @@
1
- accelerate
2
- retry
3
- asyncio
4
- basicsr
5
- beautifulsoup4
6
- bs4
7
- opencv-python
8
- deep-translator
9
- duckduckgo-search
10
- fastapi
11
- faker
12
- flask
13
- flask-cors
14
- facexlib
15
- ffmpeg-python
16
- gfpgan
17
- imageio
18
- imageio-ffmpeg
19
- langdetect
20
- librosa
21
- nltk
22
- numpy
23
- Pillow
24
- pydub
25
- pytorch-lightning
26
- PyYAML
27
- retry
28
- safetensors
29
- scikit-learn
30
- scipy
31
- scikit-image
32
- soundfile
33
- torch
34
- torchaudio
35
- torchvision
36
- tqdm
37
- wget
38
- yacs
39
- numba
40
- librosa
41
- faker
 
1
+ accelerate
2
+ retry
3
+ asyncio
4
+ basicsr
5
+ beautifulsoup4
6
+ bs4
7
+ opencv-python
8
+ deep-translator
9
+ duckduckgo-search
10
+ fastapi
11
+ faker
12
+ flask
13
+ flask-cors
14
+ facexlib
15
+ ffmpeg-python
16
+ gfpgan
17
+ imageio
18
+ imageio-ffmpeg
19
+ langdetect
20
+ librosa
21
+ nltk
22
+ numpy
23
+ Pillow
24
+ pydub
25
+ pytorch-lightning
26
+ PyYAML
27
+ retry
28
+ safetensors
29
+ scikit-learn
30
+ scipy
31
+ scikit-image
32
+ soundfile
33
+ torch
34
+ torchaudio
35
+ torchvision
36
+ tqdm
37
+ wget
38
+ yacs
39
+ numba
40
+ librosa