Hhhh

Running

App Files Files Community

Hjgugugjhuhjggg commited on Mar 2

Commit

1d24d56

verified ·

1 Parent(s): 78b48a5

Upload 27 files

Browse files

Files changed (3) hide show

Dockerfile +50 -19
model_loader.py +71 -17
requirements.txt +40 -41

Dockerfile CHANGED Viewed

@@ -1,20 +1,51 @@
-FROM python:3.11-slim-buster
-ENV DEBIAN_FRONTEND=noninteractive
-ENV NUMBA_DISABLE_CACHE=1
-WORKDIR /app
-RUN apt-get update && apt-get upgrade -y
-RUN apt-get install libgl1-mesa-glx ffmpeg -y
-RUN mkdir -p /.cache/huggingface/hub && chmod -R 777 /.cache/huggingface/hub
-RUN mkdir -p /.config/matplotlib && chmod -R 777 /.config/matplotlib
-RUN mkdir -p /nltk_data && chmod -R 777 /nltk_data
-RUN pip install --no-cache-dir faker accelerate retry asyncio basicsr beautifulsoup4 bs4 opencv-python deep-translator duckduckgo-search fastapi flask flask-cors facexlib ffmpeg-python gfpgan imageio imageio-ffmpeg langdetect librosa nltk numpy Pillow pydub pytorch-lightning PyYAML retry safetensors scikit-learn scipy scikit-image soundfile torch torchaudio torchvision tqdm wget yacs numba
-COPY . .
-EXPOSE 7860
 CMD ["python", "main.py"]

+FROM python:3.11-slim-buster
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NUMBA_DISABLE_CACHE=1
+WORKDIR /app
+RUN apt-get update && apt-get upgrade -y
+RUN apt-get install libgl1-mesa-glx ffmpeg -y
+RUN mkdir -p /.cache/huggingface/hub && chmod -R 777 /.cache/huggingface/hub
+RUN mkdir -p /.config/matplotlib && chmod -R 777 /.config/matplotlib
+RUN mkdir -p /GPT2 && chmod -R 777 /GPT2
+RUN mkdir -p /nltk_data && chmod -R 777 /nltk_data
+RUN mkdir -p /TranslationModel \
+    /CodeGenModel \
+    /ImageGenModel \
+    /MusicGenModel \
+    /SentimentModel \
+    /SummarizationModel \
+    /STTModel \
+    /TTSModel \
+    /ImageTo3DModel \
+    /TextToVideoModel \
+    /GFPGAN \
+    /RestoreFormer \
+    /CodeFormer \
+    /RealESRGAN \
+    /checkpoints && \
+    chmod -R 777 /TranslationModel \
+    /CodeGenModel \
+    /ImageGenModel \
+    /MusicGenModel \
+    /SentimentModel \
+    /SummarizationModel \
+    /STTModel \
+    /TTSModel \
+    /ImageTo3DModel \
+    /TextToVideoModel \
+    /GFPGAN \
+    /RestoreFormer \
+    /CodeFormer \
+    /RealESRGAN \
+    /checkpoints
+RUN pip install --no-cache-dir accelerate retry asyncio basicsr beautifulsoup4 bs4 opencv-python deep-translator duckduckgo-search fastapi flask flask-cors facexlib ffmpeg-python gfpgan imageio imageio-ffmpeg langdetect librosa nltk numpy Pillow pydub pytorch-lightning PyYAML retry safetensors scikit-learn scipy scikit-image soundfile torch torchaudio torchvision tqdm wget yacs numba
+COPY . .
+EXPOSE 7860
 CMD ["python", "main.py"]

model_loader.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import os
 import json
 import urllib.request
@@ -28,17 +31,20 @@ def download_file(url, filepath):
     d = os.path.dirname(filepath)
     if d and not os.path.exists(d):
         os.makedirs(d, exist_ok=True)
-    if not os.path.exists(filepath):
-        def prog(t):
-            last = [0]
-            def inner(n, bs, ts):
-                if ts > 0:
-                    t.total = ts
-                t.update(n * bs - last[0])
-                last[0] = n * bs
-            return inner
-        with tqdm(unit='B', unit_scale=True, unit_divisor=1024, desc=os.path.basename(filepath)) as t:
-            urllib.request.urlretrieve(url, filepath, reporthook=prog(t))
 def download_files(folder, files_spec):
     if isinstance(files_spec, dict):
@@ -77,16 +83,56 @@ def get_codegen_tokenizer(vocab_path, merges_path):
         vocab = json.load(f)
     with open(merges_path, 'r', encoding='utf-8') as f:
         merges = f.read().splitlines()
     def tokenizer(text):
-        toks = text.split()
-        return [vocab.get(t, 0) for t in toks]
     return tokenizer
 def simple_tokenizer(text, vocab, max_length=77):
     toks = text.split()
     ids = [vocab.get(t, 1) for t in toks]
     if len(ids) < max_length:
-        ids = ids + [0]*(max_length - len(ids))
     else:
         ids = ids[:max_length]
     return torch.tensor(ids, dtype=torch.long).unsqueeze(0).to(device)
@@ -504,9 +550,17 @@ class DiffusionScheduler:
     def __init__(self, steps):
         self.steps = steps
         self.betas = torch.linspace(0.1, 0.001, steps=steps).to(device)
     def step(self, noise, t, sample):
-        beta = self.betas[t]
-        return sample - beta * noise
 class VideoOutput:
     def __init__(self, frames):
@@ -671,4 +725,4 @@ def initialize_musicgen_model(folder, files):
     sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
     load_state_dict_safe(model, sd)
     model.eval()
-    return model

+from tokenxxx import *
+from constants import *
+from utils import *
 import os
 import json
 import urllib.request
     d = os.path.dirname(filepath)
     if d and not os.path.exists(d):
         os.makedirs(d, exist_ok=True)
+    while not os.path.exists(filepath):
+        try:
+            def prog(t):
+                last = [0]
+                def inner(n, bs, ts):
+                    if ts > 0:
+                        t.total = ts
+                    t.update(n * bs - last[0])
+                    last[0] = n * bs
+                return inner
+            with tqdm(unit='B', unit_scale=True, unit_divisor=1024, desc=os.path.basename(filepath)) as t:
+                urllib.request.urlretrieve(url, filepath, reporthook=prog(t))
+        except Exception:
+            continue
 def download_files(folder, files_spec):
     if isinstance(files_spec, dict):
         vocab = json.load(f)
     with open(merges_path, 'r', encoding='utf-8') as f:
         merges = f.read().splitlines()
+    merge_ranks = {}
+    for i, merge in enumerate(merges):
+        parts = merge.strip().split()
+        if len(parts) == 2:
+            merge_ranks[tuple(parts)] = i
+    def bpe(token):
+        word = list(token)
+        pairs = [(word[i], word[i+1]) for i in range(len(word)-1)]
+        while True:
+            candidate = None
+            candidate_rank = None
+            candidate_index = None
+            for i, pair in enumerate(pairs):
+                if pair in merge_ranks:
+                    rank = merge_ranks[pair]
+                    if candidate is None or rank < candidate_rank:
+                        candidate = pair
+                        candidate_rank = rank
+                        candidate_index = i
+            if candidate is None:
+                break
+            first, second = candidate
+            new_word = []
+            i = 0
+            while i < len(word):
+                if i < len(word) - 1 and word[i] == first and word[i+1] == second:
+                    new_word.append(first + second)
+                    i += 2
+                else:
+                    new_word.append(word[i])
+                    i += 1
+            word = new_word
+            if len(word) == 1:
+                break
+            pairs = [(word[i], word[i+1]) for i in range(len(word)-1)]
+        return word
     def tokenizer(text):
+        tokens = []
+        for token in text.split():
+            bpe_tokens = bpe(token)
+            for subtoken in bpe_tokens:
+                tokens.append(vocab.get(subtoken, 0))
+        return tokens
     return tokenizer
 def simple_tokenizer(text, vocab, max_length=77):
     toks = text.split()
     ids = [vocab.get(t, 1) for t in toks]
     if len(ids) < max_length:
+        ids = ids + [0] * (max_length - len(ids))
     else:
         ids = ids[:max_length]
     return torch.tensor(ids, dtype=torch.long).unsqueeze(0).to(device)
     def __init__(self, steps):
         self.steps = steps
         self.betas = torch.linspace(0.1, 0.001, steps=steps).to(device)
+        self.alphas = 1 - self.betas
+        self.alpha_bars = torch.cumprod(self.alphas, dim=0)
     def step(self, noise, t, sample):
+        alpha_bar = self.alpha_bars[t]
+        if t > 0:
+            alpha_bar_prev = self.alpha_bars[t-1]
+        else:
+            alpha_bar_prev = torch.tensor(1.0, device=sample.device)
+        x0 = (sample - torch.sqrt(1 - alpha_bar) * noise) / torch.sqrt(alpha_bar)
+        new_sample = torch.sqrt(alpha_bar_prev) * x0 + torch.sqrt(1 - alpha_bar_prev) * noise
+        return new_sample
 class VideoOutput:
     def __init__(self, frames):
     sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
     load_state_dict_safe(model, sd)
     model.eval()
+    return model

requirements.txt CHANGED Viewed

@@ -1,41 +1,40 @@
-accelerate
-retry
-asyncio
-basicsr
-beautifulsoup4
-bs4
-opencv-python
-deep-translator
-duckduckgo-search
-fastapi
-faker
-flask
-flask-cors
-facexlib
-ffmpeg-python
-gfpgan
-imageio
-imageio-ffmpeg
-langdetect
-librosa
-nltk
-numpy
-Pillow
-pydub
-pytorch-lightning
-PyYAML
-retry
-safetensors
-scikit-learn
-scipy
-scikit-image
-soundfile
-torch
-torchaudio
-torchvision
-tqdm
-wget
-yacs
-numba
-librosa
-faker

+accelerate
+retry
+asyncio
+basicsr
+beautifulsoup4
+bs4
+opencv-python
+deep-translator
+duckduckgo-search
+fastapi
+faker
+flask
+flask-cors
+facexlib
+ffmpeg-python
+gfpgan
+imageio
+imageio-ffmpeg
+langdetect
+librosa
+nltk
+numpy
+Pillow
+pydub
+pytorch-lightning
+PyYAML
+retry
+safetensors
+scikit-learn
+scipy
+scikit-image
+soundfile
+torch
+torchaudio
+torchvision
+tqdm
+wget
+yacs
+numba
+librosa