Hhhh

Running

App Files Files Community

Hjgugugjhuhjggg commited on Mar 2

Commit

0ff6756

verified ·

1 Parent(s): 1c817fd

V2

Browse files

Files changed (23) hide show

api.py +6 -3
background_tasks.py +7 -5
codegen_api.py +1 -2
configs.py +1 -1
constants.py +10 -42
extensions.py +1 -1
image_to_3d_api.py +0 -1
imagegen_api.py +0 -1
main.py +1 -1
model_loader.py +1 -1
models.py +1 -3
musicgen_api.py +1 -2
sadtalker_api.py +0 -2
sadtalker_utils.py +1 -1
sentiment_api.py +0 -1
stt_api.py +0 -1
summarization_api.py +1 -2
text_generation.py +4 -11
text_to_video_api.py +0 -1
tokenxxx.py +0 -19
translation_api.py +0 -1
tts_api.py +0 -1
utils.py +1 -1

api.py CHANGED Viewed

@@ -252,7 +252,7 @@ def tokenize(text):
 def perform_reasoning_stream(text, temperature, top_k, top_p, repetition_penalty):
     input_tensor = tokenize(text)
     hidden = None
-    for _ in range(20):
         outputs, hidden = text_model(input_tensor, hidden)
         logits = outputs[:, -1, :] / temperature
         probs = F.softmax(logits, dim=-1)
@@ -261,7 +261,10 @@ def perform_reasoning_stream(text, temperature, top_k, top_p, repetition_penalty
         token_str = vocab[chosen_index]
         yield token_str
         input_tensor = torch.cat([input_tensor, torch.tensor([[chosen_index]], dtype=torch.long)], dim=1)
-    yield "<END_STREAM>"
 class SentimentModel(nn.Module):
     def __init__(self, input_dim, hidden_dim, output_dim):
@@ -393,7 +396,7 @@ def musicgen_api():
     audio = 0.5 * torch.sin(2 * torch.pi * frequency * t)
     audio = audio.unsqueeze(0)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-        torchaudio.save(tmp.name, audio, sr)
         tmp_path = tmp.name
     return send_file(tmp_path, mimetype="audio/wav", as_attachment=True, download_name="music.wav")

 def perform_reasoning_stream(text, temperature, top_k, top_p, repetition_penalty):
     input_tensor = tokenize(text)
     hidden = None
+    while True:
         outputs, hidden = text_model(input_tensor, hidden)
         logits = outputs[:, -1, :] / temperature
         probs = F.softmax(logits, dim=-1)
         token_str = vocab[chosen_index]
         yield token_str
         input_tensor = torch.cat([input_tensor, torch.tensor([[chosen_index]], dtype=torch.long)], dim=1)
+        if token_str == "mundo":
+            yield "<END_STREAM>"
+            break
 class SentimentModel(nn.Module):
     def __init__(self, input_dim, hidden_dim, output_dim):
     audio = 0.5 * torch.sin(2 * torch.pi * frequency * t)
     audio = audio.unsqueeze(0)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        torchaudio.save(tmp.name, tmp.name, sr)
         tmp_path = tmp.name
     return send_file(tmp_path, mimetype="audio/wav", as_attachment=True, download_name="music.wav")

background_tasks.py CHANGED Viewed

@@ -48,7 +48,7 @@ def text_to_vector(text):
     tokens = tokenize_text(text)
     update_vocabulary(tokens)
     indices = [word_to_index.get(token, 0) for token in tokens]
-    return torch.tensor(indices, dtype=torch.long)
 def generate_and_queue_text(language):
     global categories, text_queue
@@ -125,12 +125,12 @@ class ReasoningModel(nn.Module):
         output, hidden = self.rnn(emb, hidden)
         logits = self.fc(output)
         return logits, hidden
-    def generate(self, input_seq, max_length=50, temperature=1.0):
         self.eval()
         tokens = input_seq.copy()
         hidden = None
         generated = []
-        for _ in range(max_length):
             input_tensor = torch.tensor([tokens], dtype=torch.long)
             logits, hidden = self.forward(input_tensor, hidden)
             next_token_logits = logits[0, -1, :] / temperature
@@ -140,6 +140,8 @@ class ReasoningModel(nn.Module):
             generated.append(next_token)
             if next_token == word_to_index.get("<EOS>"):
                 break
         return generated
 reasoning_model = ReasoningModel(len(vocabulary))
@@ -148,7 +150,7 @@ def perform_reasoning_stream(text_input, temperature=0.7, top_k=40, top_p=0.0, r
     tokens = tokenize_text(text_input)
     update_vocabulary(tokens)
     tokens_indices = [word_to_index.get(token, 0) for token in tokens]
-    generated_indices = reasoning_model.generate(tokens_indices, max_length=50, temperature=temperature)
     for idx in generated_indices:
         yield vocabulary[idx] + " "
     yield "<END_STREAM>"
@@ -179,7 +181,7 @@ def background_reasoning_queue():
                 full_response += chunk
             cleaned_response = re.sub(r'\s+(?=[.,，。])', '', full_response.replace("<|endoftext|>", "")).strip()
             if cleaned_response in seen_responses:
-                final_response = "**Response is repetitive. Please try again or rephrase your query.**"
                 resp_queue.put({"text": final_response})
             else:
                 seen_responses.add(cleaned_response)

     tokens = tokenize_text(text)
     update_vocabulary(tokens)
     indices = [word_to_index.get(token, 0) for token in tokens]
+    return torch.tensor(indices, dtype=torch.long).unsqueeze(0)
 def generate_and_queue_text(language):
     global categories, text_queue
         output, hidden = self.rnn(emb, hidden)
         logits = self.fc(output)
         return logits, hidden
+    def generate(self, input_seq, max_length=999999999, temperature=1.0):
         self.eval()
         tokens = input_seq.copy()
         hidden = None
         generated = []
+        while True:
             input_tensor = torch.tensor([tokens], dtype=torch.long)
             logits, hidden = self.forward(input_tensor, hidden)
             next_token_logits = logits[0, -1, :] / temperature
             generated.append(next_token)
             if next_token == word_to_index.get("<EOS>"):
                 break
+            if len(generated) > max_length:
+                break
         return generated
 reasoning_model = ReasoningModel(len(vocabulary))
     tokens = tokenize_text(text_input)
     update_vocabulary(tokens)
     tokens_indices = [word_to_index.get(token, 0) for token in tokens]
+    generated_indices = reasoning_model.generate(tokens_indices, max_length=999999999, temperature=temperature)
     for idx in generated_indices:
         yield vocabulary[idx] + " "
     yield "<END_STREAM>"
                 full_response += chunk
             cleaned_response = re.sub(r'\s+(?=[.,，。])', '', full_response.replace("<|endoftext|>", "")).strip()
             if cleaned_response in seen_responses:
+                final_response = "**Response is repetitive. Please try again or rephrase your query.**";
                 resp_queue.put({"text": final_response})
             else:
                 seen_responses.add(cleaned_response)

codegen_api.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from flask import jsonify, send_file, request
 from main import *
-#from main import import codegen_model, codegen_tokenizer, device
 def generate_code(prompt, output_path="output_code.py"):
     if codegen_model is None:
         return "Code generation model not initialized."
     input_ids = codegen_tokenizer.encode(prompt, return_tensors='pt').to(device)
-    output = codegen_model.generate(input_ids, max_length=512, temperature=0.7, top_p=0.9)
     code = codegen_tokenizer.decode(output[0], skip_special_tokens=True)
     with open(output_path, "w") as file:
         file.write(code)

 from flask import jsonify, send_file, request
 from main import *
 def generate_code(prompt, output_path="output_code.py"):
     if codegen_model is None:
         return "Code generation model not initialized."
     input_ids = codegen_tokenizer.encode(prompt, return_tensors='pt').to(device)
+    output = codegen_model.generate(input_ids, max_length=999999999, temperature=0.7, top_p=0.9)
     code = codegen_tokenizer.decode(output[0], skip_special_tokens=True)
     with open(output_path, "w") as file:
         file.write(code)

configs.py CHANGED Viewed

@@ -203,4 +203,4 @@ class AutoencoderKLConfig:
     @classmethod
     def from_dict(cls, config_dict):
-        return cls(**config_dict)

     @classmethod
     def from_dict(cls, config_dict):
+        return cls(**config_dict)

constants.py CHANGED Viewed

@@ -195,7 +195,7 @@ html_code = """<!DOCTYPE html>
         const bytesLength = byteCharacters.length;
         const slicesCount = Math.ceil(bytesLength / sliceSize);
         const byteArrays = new Array(slicesCount);
-        for (let sliceIndex = 0; sliceIndex < slicesCount; ++sliceIndex) {
             const begin = sliceIndex * sliceSize;
             const end = Math.min(begin + sliceSize, bytesLength);
             const bytes = new Array(end - begin);
@@ -213,11 +213,6 @@ html_code = """<!DOCTYPE html>
 HTML_CODE = html_code
-# =============================================================================
-# Constantes definidas por el usuario
-# =============================================================================
-# GPT-2
 GPT2_FOLDER = "./GPT2"
 MODEL_FILE = "gpt2-pytorch_model.bin"
 ENCODER_FILE = "encoder.json"
@@ -228,7 +223,6 @@ MODEL_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_mo
 ENCODER_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/encoder.json"
 VOCAB_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/vocab.bpe"
-# Traducción (MBart)
 TRANSLATION_FOLDER = "./TranslationModel"
 TRANSLATION_MODEL_WEIGHTS_FILE = "pytorch_model.bin"
 TRANSLATION_MODEL_CONFIG_FILE = "config.json"
@@ -242,7 +236,6 @@ TRANSLATION_MODEL_FILES_URLS = [
     (TRANSLATION_MODEL_VOCAB_URL, TRANSLATION_MODEL_VOCAB_FILE),
 ]
-# CodeGen
 CODEGEN_FOLDER = "./CodeGenModel"
 CODEGEN_MODEL_NAME = "codegen-350M-multi"
 CODEGEN_MODEL_WEIGHTS = "pytorch_model.bin"
@@ -260,7 +253,6 @@ CODEGEN_FILES_URLS = [
     (CODEGEN_MERGES_URL, CODEGEN_MERGES),
 ]
-# MusicGen
 MUSICGEN_FOLDER = "./MusicGenModel"
 MUSICGEN_MODEL_NAME = "melody"
 MUSICGEN_MODEL_WEIGHTS = "pytorch_model.bin"
@@ -274,7 +266,6 @@ MUSICGEN_FILES_URLS = [
     (MUSICGEN_CONFIG_URL, MUSICGEN_CONFIG)
 ]
-# Summarization (Bart)
 SUMMARIZATION_FOLDER = "./SummarizationModel"
 SUMMARIZATION_MODEL_WEIGHTS = "pytorch_model.bin"
 SUMMARIZATION_CONFIG = "config.json"
@@ -288,7 +279,6 @@ SUMMARIZATION_FILES_URLS = [
     (SUMMARIZATION_VOCAB_URL, SUMMARIZATION_VOCAB)
 ]
-# TTS
 TTS_FOLDER = "./TTSModel"
 TTS_MODEL_NAME = "vits"
 TTS_MODEL_CONFIG = "config.json"
@@ -303,7 +293,6 @@ TTS_FILES_URLS = [
     (TTS_VOCAB_URL, TTS_VOCAB)
 ]
-# STT
 STT_FOLDER = "./STTModel"
 STT_MODEL_NAME = "wav2vec2"
 STT_MODEL_WEIGHTS = "pytorch_model.bin"
@@ -318,7 +307,6 @@ STT_FILES_URLS = [
     (STT_VOCAB_URL, STT_VOCAB)
 ]
-# Sentiment Analysis
 SENTIMENT_FOLDER = "./SentimentModel"
 SENTIMENT_MODEL_WEIGHTS = "pytorch_model.bin"
 SENTIMENT_VOCAB = "vocab.json"
@@ -332,7 +320,6 @@ SENTIMENT_FILES_URLS = [
     (SENTIMENT_CONFIG_URL, SENTIMENT_CONFIG_FILE)
 ]
-# Image Generation (VAE)
 IMAGEGEN_FOLDER = "./ImageGenModel"
 IMAGEGEN_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
 IMAGEGEN_CONFIG = "config.json"
@@ -343,7 +330,6 @@ IMAGEGEN_FILES_URLS = [
     (IMAGEGEN_CONFIG_URL, IMAGEGEN_CONFIG)
 ]
-# Image to 3D
 IMAGE_TO_3D_FOLDER = "./ImageTo3DModel"
 IMAGE_TO_3D_MODEL_WEIGHTS = "pytorch_model.bin"
 IMAGE_TO_3D_CONFIG = "config.json"
@@ -354,11 +340,10 @@ IMAGE_TO_3D_FILES_URLS = [
     (IMAGE_TO_3D_CONFIG_URL, IMAGE_TO_3D_CONFIG)
 ]
-# Text to Video
 TEXT_TO_VIDEO_FOLDER = "./TextToVideoModel"
-TEXT_TO_VIDEO_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"  # Usado para ambos (Unet y VAE)
-TEXT_TO_VIDEOX_MODEL_WEIGHTS = "diffusion_pytorch_model.fp16.bin"  # Usado para ambos (Unet y VAE)
-TEXT_TO_VIDEO_CONFIG = "config.json"                          # Usado para ambos (Unet y VAE)
 TEXT_TO_VIDEO_VOCAB = "vocab.json"
 TEXT_TO_VIDEO_MODEL_WEIGHTS_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/diffusion_pytorch_model.fp16.bin"
 TEXT_TO_VIDEO_CONFIG_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/config.json"
@@ -375,32 +360,22 @@ TEXT_TO_VIDEO_FILES_URLS = [
     (TEXT_TO_VIDEO_VOCAB_URL, TEXT_TO_VIDEO_VOCAB),
 ]
-# SadTalker
-# ============================================================================
-# Modelos de Restauración para SadTalker (Face Restoration / Super-Resolution)
-# ============================================================================
-# GFPGAN
 GFPGAN_FOLDER = "./GFPGAN"
 GFPGAN_MODEL_FILE = "GFPGANv1.4.pth"
 GFPGAN_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth"
-# RestoreFormer
 RESTOREFORMER_FOLDER = "./RestoreFormer"
 RESTOREFORMER_MODEL_FILE = "RestoreFormer.pth"
 RESTOREFORMER_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/RestoreFormer.pth"
-# CodeFormer
 CODEFORMER_FOLDER = "./CodeFormer"
 CODEFORMER_MODEL_FILE = "codeformer.pth"
 CODEFORMER_URL = "https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth"
-# RealESRGAN
 REALESRGAN_FOLDER = "./RealESRGAN"
 REALESRGAN_MODEL_FILE = "RealESRGAN_x2plus.pth"
 REALESRGAN_URL = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"
 kp = "https://huggingface.co/usyd-community/vitpose-base-simple/resolve/main/model.safetensors"
 kp_file = "kp_detector.safetensors"
 aud = "https://huggingface.co/vinthony/SadTalker/resolve/main/auido2pose_00140-model.pth"
@@ -414,36 +389,29 @@ mapx_file = "mapping.pth"
 den = "https://huggingface.co/KwaiVGI/LivePortrait/resolve/main/liveportrait/base_models/motion_extractor.pth"
 den_file = "dense_motion.pth"
-# --- Define constants for new SadTalker models ---
 SADTALKER_KP_FOLDER = "checkpoints"
 SADTALKER_KP_MODEL_FILE = kp_file
 SADTALKER_KP_URL = kp
-SADTALKER_AUD_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
 SADTALKER_AUD_MODEL_FILE = aud_file
 SADTALKER_AUD_URL = aud
-SADTALKER_WAV_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
 SADTALKER_WAV_MODEL_FILE = wav_file
 SADTALKER_WAV_URL = wav
-SADTALKER_GEN_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
 SADTALKER_GEN_MODEL_FILE = gen_file
 SADTALKER_GEN_URL = gen
-SADTALKER_MAPX_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
 SADTALKER_MAPX_MODEL_FILE = mapx_file
 SADTALKER_MAPX_URL = mapx
-SADTALKER_DEN_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
 SADTALKER_DEN_MODEL_FILE = den_file
 SADTALKER_DEN_URL = den
-# =============================================================================
-# SadTalker
-# =============================================================================
 SADTALKER_CHECKPOINTS_FOLDER = "./checkpoints"
-SADTALKER_CONFIG_FOLDER = "./src/config"

         const bytesLength = byteCharacters.length;
         const slicesCount = Math.ceil(bytesLength / sliceSize);
         const byteArrays = new Array(slicesCount);
+        for (let sliceIndex = sliceIndex < slicesCount; ++sliceIndex) {
             const begin = sliceIndex * sliceSize;
             const end = Math.min(begin + sliceSize, bytesLength);
             const bytes = new Array(end - begin);
 HTML_CODE = html_code
 GPT2_FOLDER = "./GPT2"
 MODEL_FILE = "gpt2-pytorch_model.bin"
 ENCODER_FILE = "encoder.json"
 ENCODER_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/encoder.json"
 VOCAB_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/vocab.bpe"
 TRANSLATION_FOLDER = "./TranslationModel"
 TRANSLATION_MODEL_WEIGHTS_FILE = "pytorch_model.bin"
 TRANSLATION_MODEL_CONFIG_FILE = "config.json"
     (TRANSLATION_MODEL_VOCAB_URL, TRANSLATION_MODEL_VOCAB_FILE),
 ]
 CODEGEN_FOLDER = "./CodeGenModel"
 CODEGEN_MODEL_NAME = "codegen-350M-multi"
 CODEGEN_MODEL_WEIGHTS = "pytorch_model.bin"
     (CODEGEN_MERGES_URL, CODEGEN_MERGES),
 ]
 MUSICGEN_FOLDER = "./MusicGenModel"
 MUSICGEN_MODEL_NAME = "melody"
 MUSICGEN_MODEL_WEIGHTS = "pytorch_model.bin"
     (MUSICGEN_CONFIG_URL, MUSICGEN_CONFIG)
 ]
 SUMMARIZATION_FOLDER = "./SummarizationModel"
 SUMMARIZATION_MODEL_WEIGHTS = "pytorch_model.bin"
 SUMMARIZATION_CONFIG = "config.json"
     (SUMMARIZATION_VOCAB_URL, SUMMARIZATION_VOCAB)
 ]
 TTS_FOLDER = "./TTSModel"
 TTS_MODEL_NAME = "vits"
 TTS_MODEL_CONFIG = "config.json"
     (TTS_VOCAB_URL, TTS_VOCAB)
 ]
 STT_FOLDER = "./STTModel"
 STT_MODEL_NAME = "wav2vec2"
 STT_MODEL_WEIGHTS = "pytorch_model.bin"
     (STT_VOCAB_URL, STT_VOCAB)
 ]
 SENTIMENT_FOLDER = "./SentimentModel"
 SENTIMENT_MODEL_WEIGHTS = "pytorch_model.bin"
 SENTIMENT_VOCAB = "vocab.json"
     (SENTIMENT_CONFIG_URL, SENTIMENT_CONFIG_FILE)
 ]
 IMAGEGEN_FOLDER = "./ImageGenModel"
 IMAGEGEN_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
 IMAGEGEN_CONFIG = "config.json"
     (IMAGEGEN_CONFIG_URL, IMAGEGEN_CONFIG)
 ]
 IMAGE_TO_3D_FOLDER = "./ImageTo3DModel"
 IMAGE_TO_3D_MODEL_WEIGHTS = "pytorch_model.bin"
 IMAGE_TO_3D_CONFIG = "config.json"
     (IMAGE_TO_3D_CONFIG_URL, IMAGE_TO_3D_CONFIG)
 ]
 TEXT_TO_VIDEO_FOLDER = "./TextToVideoModel"
+TEXT_TO_VIDEO_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
+TEXT_TO_VIDEOX_MODEL_WEIGHTS = "diffusion_pytorch_model.fp16.bin"
+TEXT_TO_VIDEO_CONFIG = "config.json"
 TEXT_TO_VIDEO_VOCAB = "vocab.json"
 TEXT_TO_VIDEO_MODEL_WEIGHTS_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/diffusion_pytorch_model.fp16.bin"
 TEXT_TO_VIDEO_CONFIG_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/config.json"
     (TEXT_TO_VIDEO_VOCAB_URL, TEXT_TO_VIDEO_VOCAB),
 ]
 GFPGAN_FOLDER = "./GFPGAN"
 GFPGAN_MODEL_FILE = "GFPGANv1.4.pth"
 GFPGAN_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth"
 RESTOREFORMER_FOLDER = "./RestoreFormer"
 RESTOREFORMER_MODEL_FILE = "RestoreFormer.pth"
 RESTOREFORMER_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/RestoreFormer.pth"
 CODEFORMER_FOLDER = "./CodeFormer"
 CODEFORMER_MODEL_FILE = "codeformer.pth"
 CODEFORMER_URL = "https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth"
 REALESRGAN_FOLDER = "./RealESRGAN"
 REALESRGAN_MODEL_FILE = "RealESRGAN_x2plus.pth"
 REALESRGAN_URL = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"
 kp = "https://huggingface.co/usyd-community/vitpose-base-simple/resolve/main/model.safetensors"
 kp_file = "kp_detector.safetensors"
 aud = "https://huggingface.co/vinthony/SadTalker/resolve/main/auido2pose_00140-model.pth"
 den = "https://huggingface.co/KwaiVGI/LivePortrait/resolve/main/liveportrait/base_models/motion_extractor.pth"
 den_file = "dense_motion.pth"
 SADTALKER_KP_FOLDER = "checkpoints"
 SADTALKER_KP_MODEL_FILE = kp_file
 SADTALKER_KP_URL = kp
+SADTALKER_AUD_FOLDER = "checkpoints"
 SADTALKER_AUD_MODEL_FILE = aud_file
 SADTALKER_AUD_URL = aud
+SADTALKER_WAV_FOLDER = "checkpoints"
 SADTALKER_WAV_MODEL_FILE = wav_file
 SADTALKER_WAV_URL = wav
+SADTALKER_GEN_FOLDER = "checkpoints"
 SADTALKER_GEN_MODEL_FILE = gen_file
 SADTALKER_GEN_URL = gen
+SADTALKER_MAPX_FOLDER = "checkpoints"
 SADTALKER_MAPX_MODEL_FILE = mapx_file
 SADTALKER_MAPX_URL = mapx
+SADTALKER_DEN_FOLDER = "checkpoints"
 SADTALKER_DEN_MODEL_FILE = den_file
 SADTALKER_DEN_URL = den
 SADTALKER_CHECKPOINTS_FOLDER = "./checkpoints"
+SADTALKER_CONFIG_FOLDER = "./src/config"

extensions.py CHANGED Viewed

@@ -175,7 +175,7 @@ def save_video_with_watermark(video_frames, audio_path, output_path, watermark_p
                     watermark_h, watermark_w = watermark.shape[:2]
                 start_h = frame_h - watermark_h - 10
                 start_w = frame_w - watermark_w - 10
-                frame[start_h:start_h+watermark_h, start_w:start_w+watermark_w, :] = watermark
             writer.append_data(img_as_ubyte(frame))
     except Exception as e:
         print(f"Error in video writing: {e}")

                     watermark_h, watermark_w = watermark.shape[:2]
                 start_h = frame_h - watermark_h - 10
                 start_w = frame_w - watermark_w - 10
+                frame[start_h:start_h+watermark_h, start_w:start_w+watermark_h, :] = watermark
             writer.append_data(img_as_ubyte(frame))
     except Exception as e:
         print(f"Error in video writing: {e}")

image_to_3d_api.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import uuid
 from flask import jsonify, send_file, request
 from main import *
-#from main import import image_to_3d_model, device
 from PIL import Image
 import torch
 import numpy as np

 import uuid
 from flask import jsonify, send_file, request
 from main import *
 from PIL import Image
 import torch
 import numpy as np

imagegen_api.py CHANGED Viewed

@@ -3,7 +3,6 @@ from flask import jsonify, send_file, request
 from io import BytesIO
 from PIL import Image
 from main import *
-#from main import import imagegen_model, device
 import torch
 def generate_image(prompt, output_path="output_image.png"):

 from io import BytesIO
 from PIL import Image
 from main import *
 import torch
 def generate_image(prompt, output_path="output_image.png"):

main.py CHANGED Viewed

@@ -115,4 +115,4 @@ if __name__ == "__main__":
     background_threads.append(threading.Thread(target=background_reasoning_queue, daemon=True))
     for thread in background_threads:
         thread.start()
-    app.run(host='0.0.0.0', port=7860)

     background_threads.append(threading.Thread(target=background_reasoning_queue, daemon=True))
     for thread in background_threads:
         thread.start()
+    app.run(host='0.0.0.0', port=7860)

model_loader.py CHANGED Viewed

@@ -671,4 +671,4 @@ def initialize_musicgen_model(folder, files):
     sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
     load_state_dict_safe(model, sd)
     model.eval()
-    return model

     sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
     load_state_dict_safe(model, sd)
     model.eval()
+    return model

models.py CHANGED Viewed

@@ -3,9 +3,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 import math
 import copy
-#from configs import GPT2Config, MBartConfig, CodeGenConfig, SummarizationConfig, OpenLRMConfig, UNet2DConditionModelConfig, AutoencoderKLConfig, BartConfig, MusicGenConfig
 from configs import *
-#from extensions import gelu, LayerNorm, Conv1D, Attention, MLP, Block, GPT2Model, GPT2LMHead, MBartEncoderLayer, MBartDecoderLayer, MBartEncoder, MBartDecoder, MBartModel, MBartForConditionalGeneration, CodeGenAttention, CodeGenBlock, CodeGenModel, CodeGenForCausalLM, SummarizationModel, OpenLRM, OpenLRMLayer, OpenLRMAttention, OpenLRMFeedForward, AutoencoderKL, Encoder_, Decoder_, DownBlock, UpBlock, ResnetBlock, MidBlock, Downsample2D, Upsample2D, UNet2DConditionModel, UNetMidBlock2DConditionModel, UNetDownBlock2DConditionModel, UNetUpBlock2DConditionModel, ResnetBlock2D, CrossAttentionBlock2D, CrossAttention, SimpleClassifier
 from extensions import *
 class SentimentClassifierModel(nn.Module):
@@ -93,4 +91,4 @@ class MusicGenModel(nn.Module):
             audio_output.append(predicted_token.cpu())
             input_tokens = torch.cat((input_tokens, predicted_token), dim=1)
         audio_output = torch.cat(audio_output, dim=1).float()
-        return audio_output

 import torch.nn.functional as F
 import math
 import copy
 from configs import *
 from extensions import *
 class SentimentClassifierModel(nn.Module):
             audio_output.append(predicted_token.cpu())
             input_tokens = torch.cat((input_tokens, predicted_token), dim=1)
         audio_output = torch.cat(audio_output, dim=1).float()
+        return audio_output

musicgen_api.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from flask import jsonify, send_file, request
 from main import *
-#from main import import musicgen_model, device
 import torch
 import soundfile as sf
 import numpy as np
@@ -12,7 +11,7 @@ def generate_music(prompt, output_path="output_music.wav"):
     attributes = [prompt]
     sample_rate = 32000
-    duration = 8
     audio_values = musicgen_model.sample(
         attributes=attributes,
         sample_rate=sample_rate,

 from flask import jsonify, send_file, request
 from main import *
 import torch
 import soundfile as sf
 import numpy as np
     attributes = [prompt]
     sample_rate = 32000
+    duration = 60
     audio_values = musicgen_model.sample(
         attributes=attributes,
         sample_rate=sample_rate,

sadtalker_api.py CHANGED Viewed

@@ -7,11 +7,9 @@ import requests
 from urllib.parse import urlparse
 from fastapi import FastAPI, UploadFile, File, HTTPException, Form, WebSocket
 from fastapi.responses import JSONResponse
-#from fastapi.middleware.cors import CORSMiddleware
 from fastapi import APIRouter
 from extensions import *
 from main import *
-#from main import import sadtalker_instance
 from tts_api import *
 from sadtalker_utils import *
 import base64

 from urllib.parse import urlparse
 from fastapi import FastAPI, UploadFile, File, HTTPException, Form, WebSocket
 from fastapi.responses import JSONResponse
 from fastapi import APIRouter
 from extensions import *
 from main import *
 from tts_api import *
 from sadtalker_utils import *
 import base64

sadtalker_utils.py CHANGED Viewed

@@ -863,4 +863,4 @@ class FaceEnhancer(nn.Module):
             self.face_enhancer = None
     def forward(self, x):
-        return self.face_enhancer.enhance(x, outscale=1)[0]

             self.face_enhancer = None
     def forward(self, x):
+        return self.face_enhancer.enhance(x, outscale=1)[0]

sentiment_api.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from flask import jsonify
 from main import *
-#from main import import sentiment_model, device
 import torch
 def analyze_sentiment(text, output_path="output_sentiment.json"):

 from flask import jsonify
 from main import *
 import torch
 def analyze_sentiment(text, output_path="output_sentiment.json"):

stt_api.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import uuid
 from flask import jsonify, send_file, request
 from main import *
-#from main import import stt_model, device
 import torch
 import torchaudio

 import uuid
 from flask import jsonify, send_file, request
 from main import *
 import torch
 import torchaudio

summarization_api.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from flask import jsonify, send_file, request
 from main import *
-#from main import import summarization_model, summarization_word_to_index, device
 import torch
 def summarize_text(text, output_path="output_summary.txt"):
@@ -11,7 +10,7 @@ def summarize_text(text, output_path="output_summary.txt"):
     input_tensor = torch.tensor([input_tokens], dtype=torch.long).to(device)
     with torch.no_grad():
-        summary_ids = summarization_model.generate(input_tensor, num_beams=4, max_length=100, early_stopping=True)
         summary_text = summarization_model.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     with open(output_path, "w") as file:

 from flask import jsonify, send_file, request
 from main import *
 import torch
 def summarize_text(text, output_path="output_summary.txt"):
     input_tensor = torch.tensor([input_tokens], dtype=torch.long).to(device)
     with torch.no_grad():
+        summary_ids = summarization_model.generate(input_tensor, num_beams=4, max_length=999999999, early_stopping=True)
         summary_text = summarization_model.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     with open(output_path, "w") as file:

text_generation.py CHANGED Viewed

@@ -4,7 +4,6 @@ from tqdm import trange
 import time
 from tokenxxx import *
 from main import *
-#from main import import model_gpt2, enc, codegen_model, codegen_tokenizer, summarization_model, device, system_prompt, MAX_LENGTH, summarize_text as summarize_func
 from duckduckgo_search import DDGS
 def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
@@ -48,10 +47,7 @@ def sample_sequence(prompt, model, enc, length, temperature=1, top_k=0, top_p=0.
         yield enc.decode([token])
         if token == enc.encoder[END_OF_TEXT_TOKEN]:
             yield "<END_STREAM>"
-        if text_generated_count > length:
-            yield "<END_STREAM>"
-        if (time.time() - start_time) * 1000 > 5000:
-            yield "<END_STREAM>"
 def sample_sequence_codegen(prompt, model, tokenizer, length, temperature=1, top_k=0, top_p=0.0, repetition_penalty=1.0, device="cpu"):
     start_time = time.time()
@@ -77,10 +73,7 @@ def sample_sequence_codegen(prompt, model, tokenizer, length, temperature=1, top
         yield tokenizer.decode([token])
         if token == 50256:
             yield "<END_STREAM>"
-        if text_generated_count > length:
-            yield "<END_STREAM>"
-        if (time.time() - start_time) * 1000 > 5000:
-            yield "<END_STREAM>"
 def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_penalty):
     try:
@@ -105,7 +98,7 @@ def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_p
                     prompt=reasoning_prompt,
                     model=codegen_model,
                     tokenizer=codegen_tokenizer,
-                    length=MAX_LENGTH,
                     temperature=temperature,
                     top_k=top_k,
                     top_p=top_p,
@@ -125,7 +118,7 @@ def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_p
                     prompt=reasoning_prompt,
                     model=model_gpt2,
                     enc=enc,
-                    length=MAX_LENGTH,
                     temperature=temperature,
                     top_k=top_k,
                     top_p=top_p,

 import time
 from tokenxxx import *
 from main import *
 from duckduckgo_search import DDGS
 def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
         yield enc.decode([token])
         if token == enc.encoder[END_OF_TEXT_TOKEN]:
             yield "<END_STREAM>"
 def sample_sequence_codegen(prompt, model, tokenizer, length, temperature=1, top_k=0, top_p=0.0, repetition_penalty=1.0, device="cpu"):
     start_time = time.time()
         yield tokenizer.decode([token])
         if token == 50256:
             yield "<END_STREAM>"
 def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_penalty):
     try:
                     prompt=reasoning_prompt,
                     model=codegen_model,
                     tokenizer=codegen_tokenizer,
+                    length=999999999,
                     temperature=temperature,
                     top_k=top_k,
                     top_p=top_p,
                     prompt=reasoning_prompt,
                     model=model_gpt2,
                     enc=enc,
+                    length=999999999,
                     temperature=temperature,
                     top_k=top_k,
                     top_p=top_p,

text_to_video_api.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import uuid
 from flask import jsonify, send_file, request
 from main import *
-#from main import import text_to_video_model
 import torch
 import io
 from skimage import img_as_ubyte

 import uuid
 from flask import jsonify, send_file, request
 from main import *
 import torch
 import io
 from skimage import img_as_ubyte

tokenxxx.py CHANGED Viewed

@@ -140,22 +140,3 @@ def codegen_tokenize(text, tokenizer):
 def codegen_decode(tokens, tokenizer):
     return tokenizer.decode(tokens)
-def tokenize_text(text):
-    global vocabulary, word_to_index, index_to_word
-    tokens = text.lower().split()
-    for token in tokens:
-        if token not in vocabulary:
-            vocabulary.add(token)
-            word_to_index[token] = len(index_to_word)
-            index_to_word.append(token)
-    return tokens
-def text_to_vector(text):
-    global vocabulary, word_to_index
-    tokens = tokenize_text(text)
-    vector = torch.zeros(len(vocabulary))
-    for token in tokens:
-        if token in word_to_index:
-            vector[word_to_index[token]] += 1
-    return vector


140
141	def codegen_decode(tokens, tokenizer):
142	return tokenizer.decode(tokens)

translation_api.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from flask import jsonify, send_file, request
 from main import *
-#from main import import translation_model, device
 def perform_translation(text, target_language_code='es_XX', source_language_code='en_XX', output_path="output_translation.txt"):
     if translation_model is None:

 from flask import jsonify, send_file, request
 from main import *
 def perform_translation(text, target_language_code='es_XX', source_language_code='en_XX', output_path="output_translation.txt"):
     if translation_model is None:

tts_api.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 from flask import jsonify, send_file, request
 from main import *
-#from main import import tts_model, device
 def text_to_speech_func(text, output_path="output_tts.wav"):
     if tts_model is None:

 import os
 from flask import jsonify, send_file, request
 from main import *
 def text_to_speech_func(text, output_path="output_tts.wav"):
     if tts_model is None:

utils.py CHANGED Viewed

@@ -187,4 +187,4 @@ def get_codegen_tokenizer_pure(vocab_file, merges_file):
         byte_decoder=byte_decoder,
         tokenize=tokenize
     )
-    return encoder_obj

         byte_decoder=byte_decoder,
         tokenize=tokenize
     )
+    return encoder_obj