Hjgugugjhuhjggg commited on
Commit
0ff6756
·
verified ·
1 Parent(s): 1c817fd
api.py CHANGED
@@ -252,7 +252,7 @@ def tokenize(text):
252
  def perform_reasoning_stream(text, temperature, top_k, top_p, repetition_penalty):
253
  input_tensor = tokenize(text)
254
  hidden = None
255
- for _ in range(20):
256
  outputs, hidden = text_model(input_tensor, hidden)
257
  logits = outputs[:, -1, :] / temperature
258
  probs = F.softmax(logits, dim=-1)
@@ -261,7 +261,10 @@ def perform_reasoning_stream(text, temperature, top_k, top_p, repetition_penalty
261
  token_str = vocab[chosen_index]
262
  yield token_str
263
  input_tensor = torch.cat([input_tensor, torch.tensor([[chosen_index]], dtype=torch.long)], dim=1)
264
- yield "<END_STREAM>"
 
 
 
265
 
266
  class SentimentModel(nn.Module):
267
  def __init__(self, input_dim, hidden_dim, output_dim):
@@ -393,7 +396,7 @@ def musicgen_api():
393
  audio = 0.5 * torch.sin(2 * torch.pi * frequency * t)
394
  audio = audio.unsqueeze(0)
395
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
396
- torchaudio.save(tmp.name, audio, sr)
397
  tmp_path = tmp.name
398
  return send_file(tmp_path, mimetype="audio/wav", as_attachment=True, download_name="music.wav")
399
 
 
252
  def perform_reasoning_stream(text, temperature, top_k, top_p, repetition_penalty):
253
  input_tensor = tokenize(text)
254
  hidden = None
255
+ while True:
256
  outputs, hidden = text_model(input_tensor, hidden)
257
  logits = outputs[:, -1, :] / temperature
258
  probs = F.softmax(logits, dim=-1)
 
261
  token_str = vocab[chosen_index]
262
  yield token_str
263
  input_tensor = torch.cat([input_tensor, torch.tensor([[chosen_index]], dtype=torch.long)], dim=1)
264
+ if token_str == "mundo":
265
+ yield "<END_STREAM>"
266
+ break
267
+
268
 
269
  class SentimentModel(nn.Module):
270
  def __init__(self, input_dim, hidden_dim, output_dim):
 
396
  audio = 0.5 * torch.sin(2 * torch.pi * frequency * t)
397
  audio = audio.unsqueeze(0)
398
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
399
+ torchaudio.save(tmp.name, tmp.name, sr)
400
  tmp_path = tmp.name
401
  return send_file(tmp_path, mimetype="audio/wav", as_attachment=True, download_name="music.wav")
402
 
background_tasks.py CHANGED
@@ -48,7 +48,7 @@ def text_to_vector(text):
48
  tokens = tokenize_text(text)
49
  update_vocabulary(tokens)
50
  indices = [word_to_index.get(token, 0) for token in tokens]
51
- return torch.tensor(indices, dtype=torch.long)
52
 
53
  def generate_and_queue_text(language):
54
  global categories, text_queue
@@ -125,12 +125,12 @@ class ReasoningModel(nn.Module):
125
  output, hidden = self.rnn(emb, hidden)
126
  logits = self.fc(output)
127
  return logits, hidden
128
- def generate(self, input_seq, max_length=50, temperature=1.0):
129
  self.eval()
130
  tokens = input_seq.copy()
131
  hidden = None
132
  generated = []
133
- for _ in range(max_length):
134
  input_tensor = torch.tensor([tokens], dtype=torch.long)
135
  logits, hidden = self.forward(input_tensor, hidden)
136
  next_token_logits = logits[0, -1, :] / temperature
@@ -140,6 +140,8 @@ class ReasoningModel(nn.Module):
140
  generated.append(next_token)
141
  if next_token == word_to_index.get("<EOS>"):
142
  break
 
 
143
  return generated
144
 
145
  reasoning_model = ReasoningModel(len(vocabulary))
@@ -148,7 +150,7 @@ def perform_reasoning_stream(text_input, temperature=0.7, top_k=40, top_p=0.0, r
148
  tokens = tokenize_text(text_input)
149
  update_vocabulary(tokens)
150
  tokens_indices = [word_to_index.get(token, 0) for token in tokens]
151
- generated_indices = reasoning_model.generate(tokens_indices, max_length=50, temperature=temperature)
152
  for idx in generated_indices:
153
  yield vocabulary[idx] + " "
154
  yield "<END_STREAM>"
@@ -179,7 +181,7 @@ def background_reasoning_queue():
179
  full_response += chunk
180
  cleaned_response = re.sub(r'\s+(?=[.,,。])', '', full_response.replace("<|endoftext|>", "")).strip()
181
  if cleaned_response in seen_responses:
182
- final_response = "**Response is repetitive. Please try again or rephrase your query.**"
183
  resp_queue.put({"text": final_response})
184
  else:
185
  seen_responses.add(cleaned_response)
 
48
  tokens = tokenize_text(text)
49
  update_vocabulary(tokens)
50
  indices = [word_to_index.get(token, 0) for token in tokens]
51
+ return torch.tensor(indices, dtype=torch.long).unsqueeze(0)
52
 
53
  def generate_and_queue_text(language):
54
  global categories, text_queue
 
125
  output, hidden = self.rnn(emb, hidden)
126
  logits = self.fc(output)
127
  return logits, hidden
128
+ def generate(self, input_seq, max_length=999999999, temperature=1.0):
129
  self.eval()
130
  tokens = input_seq.copy()
131
  hidden = None
132
  generated = []
133
+ while True:
134
  input_tensor = torch.tensor([tokens], dtype=torch.long)
135
  logits, hidden = self.forward(input_tensor, hidden)
136
  next_token_logits = logits[0, -1, :] / temperature
 
140
  generated.append(next_token)
141
  if next_token == word_to_index.get("<EOS>"):
142
  break
143
+ if len(generated) > max_length:
144
+ break
145
  return generated
146
 
147
  reasoning_model = ReasoningModel(len(vocabulary))
 
150
  tokens = tokenize_text(text_input)
151
  update_vocabulary(tokens)
152
  tokens_indices = [word_to_index.get(token, 0) for token in tokens]
153
+ generated_indices = reasoning_model.generate(tokens_indices, max_length=999999999, temperature=temperature)
154
  for idx in generated_indices:
155
  yield vocabulary[idx] + " "
156
  yield "<END_STREAM>"
 
181
  full_response += chunk
182
  cleaned_response = re.sub(r'\s+(?=[.,,。])', '', full_response.replace("<|endoftext|>", "")).strip()
183
  if cleaned_response in seen_responses:
184
+ final_response = "**Response is repetitive. Please try again or rephrase your query.**";
185
  resp_queue.put({"text": final_response})
186
  else:
187
  seen_responses.add(cleaned_response)
codegen_api.py CHANGED
@@ -1,12 +1,11 @@
1
  from flask import jsonify, send_file, request
2
  from main import *
3
- #from main import import codegen_model, codegen_tokenizer, device
4
 
5
  def generate_code(prompt, output_path="output_code.py"):
6
  if codegen_model is None:
7
  return "Code generation model not initialized."
8
  input_ids = codegen_tokenizer.encode(prompt, return_tensors='pt').to(device)
9
- output = codegen_model.generate(input_ids, max_length=512, temperature=0.7, top_p=0.9)
10
  code = codegen_tokenizer.decode(output[0], skip_special_tokens=True)
11
  with open(output_path, "w") as file:
12
  file.write(code)
 
1
  from flask import jsonify, send_file, request
2
  from main import *
 
3
 
4
  def generate_code(prompt, output_path="output_code.py"):
5
  if codegen_model is None:
6
  return "Code generation model not initialized."
7
  input_ids = codegen_tokenizer.encode(prompt, return_tensors='pt').to(device)
8
+ output = codegen_model.generate(input_ids, max_length=999999999, temperature=0.7, top_p=0.9)
9
  code = codegen_tokenizer.decode(output[0], skip_special_tokens=True)
10
  with open(output_path, "w") as file:
11
  file.write(code)
configs.py CHANGED
@@ -203,4 +203,4 @@ class AutoencoderKLConfig:
203
 
204
  @classmethod
205
  def from_dict(cls, config_dict):
206
- return cls(**config_dict)
 
203
 
204
  @classmethod
205
  def from_dict(cls, config_dict):
206
+ return cls(**config_dict)
constants.py CHANGED
@@ -195,7 +195,7 @@ html_code = """<!DOCTYPE html>
195
  const bytesLength = byteCharacters.length;
196
  const slicesCount = Math.ceil(bytesLength / sliceSize);
197
  const byteArrays = new Array(slicesCount);
198
- for (let sliceIndex = 0; sliceIndex < slicesCount; ++sliceIndex) {
199
  const begin = sliceIndex * sliceSize;
200
  const end = Math.min(begin + sliceSize, bytesLength);
201
  const bytes = new Array(end - begin);
@@ -213,11 +213,6 @@ html_code = """<!DOCTYPE html>
213
 
214
  HTML_CODE = html_code
215
 
216
- # =============================================================================
217
- # Constantes definidas por el usuario
218
- # =============================================================================
219
-
220
- # GPT-2
221
  GPT2_FOLDER = "./GPT2"
222
  MODEL_FILE = "gpt2-pytorch_model.bin"
223
  ENCODER_FILE = "encoder.json"
@@ -228,7 +223,6 @@ MODEL_URL = "https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_mo
228
  ENCODER_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/encoder.json"
229
  VOCAB_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/vocab.bpe"
230
 
231
- # Traducción (MBart)
232
  TRANSLATION_FOLDER = "./TranslationModel"
233
  TRANSLATION_MODEL_WEIGHTS_FILE = "pytorch_model.bin"
234
  TRANSLATION_MODEL_CONFIG_FILE = "config.json"
@@ -242,7 +236,6 @@ TRANSLATION_MODEL_FILES_URLS = [
242
  (TRANSLATION_MODEL_VOCAB_URL, TRANSLATION_MODEL_VOCAB_FILE),
243
  ]
244
 
245
- # CodeGen
246
  CODEGEN_FOLDER = "./CodeGenModel"
247
  CODEGEN_MODEL_NAME = "codegen-350M-multi"
248
  CODEGEN_MODEL_WEIGHTS = "pytorch_model.bin"
@@ -260,7 +253,6 @@ CODEGEN_FILES_URLS = [
260
  (CODEGEN_MERGES_URL, CODEGEN_MERGES),
261
  ]
262
 
263
- # MusicGen
264
  MUSICGEN_FOLDER = "./MusicGenModel"
265
  MUSICGEN_MODEL_NAME = "melody"
266
  MUSICGEN_MODEL_WEIGHTS = "pytorch_model.bin"
@@ -274,7 +266,6 @@ MUSICGEN_FILES_URLS = [
274
  (MUSICGEN_CONFIG_URL, MUSICGEN_CONFIG)
275
  ]
276
 
277
- # Summarization (Bart)
278
  SUMMARIZATION_FOLDER = "./SummarizationModel"
279
  SUMMARIZATION_MODEL_WEIGHTS = "pytorch_model.bin"
280
  SUMMARIZATION_CONFIG = "config.json"
@@ -288,7 +279,6 @@ SUMMARIZATION_FILES_URLS = [
288
  (SUMMARIZATION_VOCAB_URL, SUMMARIZATION_VOCAB)
289
  ]
290
 
291
- # TTS
292
  TTS_FOLDER = "./TTSModel"
293
  TTS_MODEL_NAME = "vits"
294
  TTS_MODEL_CONFIG = "config.json"
@@ -303,7 +293,6 @@ TTS_FILES_URLS = [
303
  (TTS_VOCAB_URL, TTS_VOCAB)
304
  ]
305
 
306
- # STT
307
  STT_FOLDER = "./STTModel"
308
  STT_MODEL_NAME = "wav2vec2"
309
  STT_MODEL_WEIGHTS = "pytorch_model.bin"
@@ -318,7 +307,6 @@ STT_FILES_URLS = [
318
  (STT_VOCAB_URL, STT_VOCAB)
319
  ]
320
 
321
- # Sentiment Analysis
322
  SENTIMENT_FOLDER = "./SentimentModel"
323
  SENTIMENT_MODEL_WEIGHTS = "pytorch_model.bin"
324
  SENTIMENT_VOCAB = "vocab.json"
@@ -332,7 +320,6 @@ SENTIMENT_FILES_URLS = [
332
  (SENTIMENT_CONFIG_URL, SENTIMENT_CONFIG_FILE)
333
  ]
334
 
335
- # Image Generation (VAE)
336
  IMAGEGEN_FOLDER = "./ImageGenModel"
337
  IMAGEGEN_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
338
  IMAGEGEN_CONFIG = "config.json"
@@ -343,7 +330,6 @@ IMAGEGEN_FILES_URLS = [
343
  (IMAGEGEN_CONFIG_URL, IMAGEGEN_CONFIG)
344
  ]
345
 
346
- # Image to 3D
347
  IMAGE_TO_3D_FOLDER = "./ImageTo3DModel"
348
  IMAGE_TO_3D_MODEL_WEIGHTS = "pytorch_model.bin"
349
  IMAGE_TO_3D_CONFIG = "config.json"
@@ -354,11 +340,10 @@ IMAGE_TO_3D_FILES_URLS = [
354
  (IMAGE_TO_3D_CONFIG_URL, IMAGE_TO_3D_CONFIG)
355
  ]
356
 
357
- # Text to Video
358
  TEXT_TO_VIDEO_FOLDER = "./TextToVideoModel"
359
- TEXT_TO_VIDEO_MODEL_WEIGHTS = "diffusion_pytorch_model.bin" # Usado para ambos (Unet y VAE)
360
- TEXT_TO_VIDEOX_MODEL_WEIGHTS = "diffusion_pytorch_model.fp16.bin" # Usado para ambos (Unet y VAE)
361
- TEXT_TO_VIDEO_CONFIG = "config.json" # Usado para ambos (Unet y VAE)
362
  TEXT_TO_VIDEO_VOCAB = "vocab.json"
363
  TEXT_TO_VIDEO_MODEL_WEIGHTS_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/diffusion_pytorch_model.fp16.bin"
364
  TEXT_TO_VIDEO_CONFIG_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/config.json"
@@ -375,32 +360,22 @@ TEXT_TO_VIDEO_FILES_URLS = [
375
  (TEXT_TO_VIDEO_VOCAB_URL, TEXT_TO_VIDEO_VOCAB),
376
  ]
377
 
378
- # SadTalker
379
- # ============================================================================
380
- # Modelos de Restauración para SadTalker (Face Restoration / Super-Resolution)
381
- # ============================================================================
382
- # GFPGAN
383
  GFPGAN_FOLDER = "./GFPGAN"
384
  GFPGAN_MODEL_FILE = "GFPGANv1.4.pth"
385
  GFPGAN_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth"
386
 
387
- # RestoreFormer
388
  RESTOREFORMER_FOLDER = "./RestoreFormer"
389
  RESTOREFORMER_MODEL_FILE = "RestoreFormer.pth"
390
  RESTOREFORMER_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/RestoreFormer.pth"
391
 
392
- # CodeFormer
393
  CODEFORMER_FOLDER = "./CodeFormer"
394
  CODEFORMER_MODEL_FILE = "codeformer.pth"
395
  CODEFORMER_URL = "https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth"
396
 
397
- # RealESRGAN
398
  REALESRGAN_FOLDER = "./RealESRGAN"
399
  REALESRGAN_MODEL_FILE = "RealESRGAN_x2plus.pth"
400
  REALESRGAN_URL = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"
401
 
402
-
403
-
404
  kp = "https://huggingface.co/usyd-community/vitpose-base-simple/resolve/main/model.safetensors"
405
  kp_file = "kp_detector.safetensors"
406
  aud = "https://huggingface.co/vinthony/SadTalker/resolve/main/auido2pose_00140-model.pth"
@@ -414,36 +389,29 @@ mapx_file = "mapping.pth"
414
  den = "https://huggingface.co/KwaiVGI/LivePortrait/resolve/main/liveportrait/base_models/motion_extractor.pth"
415
  den_file = "dense_motion.pth"
416
 
417
- # --- Define constants for new SadTalker models ---
418
  SADTALKER_KP_FOLDER = "checkpoints"
419
  SADTALKER_KP_MODEL_FILE = kp_file
420
  SADTALKER_KP_URL = kp
421
 
422
- SADTALKER_AUD_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
423
  SADTALKER_AUD_MODEL_FILE = aud_file
424
  SADTALKER_AUD_URL = aud
425
 
426
- SADTALKER_WAV_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
427
  SADTALKER_WAV_MODEL_FILE = wav_file
428
  SADTALKER_WAV_URL = wav
429
 
430
- SADTALKER_GEN_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
431
  SADTALKER_GEN_MODEL_FILE = gen_file
432
  SADTALKER_GEN_URL = gen
433
 
434
- SADTALKER_MAPX_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
435
  SADTALKER_MAPX_MODEL_FILE = mapx_file
436
  SADTALKER_MAPX_URL = mapx
437
 
438
- SADTALKER_DEN_FOLDER = "checkpoints" # Assuming these go in the main checkpoints folder for SadTalker
439
  SADTALKER_DEN_MODEL_FILE = den_file
440
  SADTALKER_DEN_URL = den
441
 
442
-
443
-
444
-
445
- # =============================================================================
446
- # SadTalker
447
- # =============================================================================
448
  SADTALKER_CHECKPOINTS_FOLDER = "./checkpoints"
449
- SADTALKER_CONFIG_FOLDER = "./src/config"
 
195
  const bytesLength = byteCharacters.length;
196
  const slicesCount = Math.ceil(bytesLength / sliceSize);
197
  const byteArrays = new Array(slicesCount);
198
+ for (let sliceIndex = sliceIndex < slicesCount; ++sliceIndex) {
199
  const begin = sliceIndex * sliceSize;
200
  const end = Math.min(begin + sliceSize, bytesLength);
201
  const bytes = new Array(end - begin);
 
213
 
214
  HTML_CODE = html_code
215
 
 
 
 
 
 
216
  GPT2_FOLDER = "./GPT2"
217
  MODEL_FILE = "gpt2-pytorch_model.bin"
218
  ENCODER_FILE = "encoder.json"
 
223
  ENCODER_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/encoder.json"
224
  VOCAB_URL = "https://raw.githubusercontent.com/graykode/gpt-2-Pytorch/refs/heads/master/GPT2/vocab.bpe"
225
 
 
226
  TRANSLATION_FOLDER = "./TranslationModel"
227
  TRANSLATION_MODEL_WEIGHTS_FILE = "pytorch_model.bin"
228
  TRANSLATION_MODEL_CONFIG_FILE = "config.json"
 
236
  (TRANSLATION_MODEL_VOCAB_URL, TRANSLATION_MODEL_VOCAB_FILE),
237
  ]
238
 
 
239
  CODEGEN_FOLDER = "./CodeGenModel"
240
  CODEGEN_MODEL_NAME = "codegen-350M-multi"
241
  CODEGEN_MODEL_WEIGHTS = "pytorch_model.bin"
 
253
  (CODEGEN_MERGES_URL, CODEGEN_MERGES),
254
  ]
255
 
 
256
  MUSICGEN_FOLDER = "./MusicGenModel"
257
  MUSICGEN_MODEL_NAME = "melody"
258
  MUSICGEN_MODEL_WEIGHTS = "pytorch_model.bin"
 
266
  (MUSICGEN_CONFIG_URL, MUSICGEN_CONFIG)
267
  ]
268
 
 
269
  SUMMARIZATION_FOLDER = "./SummarizationModel"
270
  SUMMARIZATION_MODEL_WEIGHTS = "pytorch_model.bin"
271
  SUMMARIZATION_CONFIG = "config.json"
 
279
  (SUMMARIZATION_VOCAB_URL, SUMMARIZATION_VOCAB)
280
  ]
281
 
 
282
  TTS_FOLDER = "./TTSModel"
283
  TTS_MODEL_NAME = "vits"
284
  TTS_MODEL_CONFIG = "config.json"
 
293
  (TTS_VOCAB_URL, TTS_VOCAB)
294
  ]
295
 
 
296
  STT_FOLDER = "./STTModel"
297
  STT_MODEL_NAME = "wav2vec2"
298
  STT_MODEL_WEIGHTS = "pytorch_model.bin"
 
307
  (STT_VOCAB_URL, STT_VOCAB)
308
  ]
309
 
 
310
  SENTIMENT_FOLDER = "./SentimentModel"
311
  SENTIMENT_MODEL_WEIGHTS = "pytorch_model.bin"
312
  SENTIMENT_VOCAB = "vocab.json"
 
320
  (SENTIMENT_CONFIG_URL, SENTIMENT_CONFIG_FILE)
321
  ]
322
 
 
323
  IMAGEGEN_FOLDER = "./ImageGenModel"
324
  IMAGEGEN_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
325
  IMAGEGEN_CONFIG = "config.json"
 
330
  (IMAGEGEN_CONFIG_URL, IMAGEGEN_CONFIG)
331
  ]
332
 
 
333
  IMAGE_TO_3D_FOLDER = "./ImageTo3DModel"
334
  IMAGE_TO_3D_MODEL_WEIGHTS = "pytorch_model.bin"
335
  IMAGE_TO_3D_CONFIG = "config.json"
 
340
  (IMAGE_TO_3D_CONFIG_URL, IMAGE_TO_3D_CONFIG)
341
  ]
342
 
 
343
  TEXT_TO_VIDEO_FOLDER = "./TextToVideoModel"
344
+ TEXT_TO_VIDEO_MODEL_WEIGHTS = "diffusion_pytorch_model.bin"
345
+ TEXT_TO_VIDEOX_MODEL_WEIGHTS = "diffusion_pytorch_model.fp16.bin"
346
+ TEXT_TO_VIDEO_CONFIG = "config.json"
347
  TEXT_TO_VIDEO_VOCAB = "vocab.json"
348
  TEXT_TO_VIDEO_MODEL_WEIGHTS_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/diffusion_pytorch_model.fp16.bin"
349
  TEXT_TO_VIDEO_CONFIG_URL_UNET = "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b/resolve/main/unet/config.json"
 
360
  (TEXT_TO_VIDEO_VOCAB_URL, TEXT_TO_VIDEO_VOCAB),
361
  ]
362
 
 
 
 
 
 
363
  GFPGAN_FOLDER = "./GFPGAN"
364
  GFPGAN_MODEL_FILE = "GFPGANv1.4.pth"
365
  GFPGAN_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth"
366
 
 
367
  RESTOREFORMER_FOLDER = "./RestoreFormer"
368
  RESTOREFORMER_MODEL_FILE = "RestoreFormer.pth"
369
  RESTOREFORMER_URL = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/RestoreFormer.pth"
370
 
 
371
  CODEFORMER_FOLDER = "./CodeFormer"
372
  CODEFORMER_MODEL_FILE = "codeformer.pth"
373
  CODEFORMER_URL = "https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth"
374
 
 
375
  REALESRGAN_FOLDER = "./RealESRGAN"
376
  REALESRGAN_MODEL_FILE = "RealESRGAN_x2plus.pth"
377
  REALESRGAN_URL = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"
378
 
 
 
379
  kp = "https://huggingface.co/usyd-community/vitpose-base-simple/resolve/main/model.safetensors"
380
  kp_file = "kp_detector.safetensors"
381
  aud = "https://huggingface.co/vinthony/SadTalker/resolve/main/auido2pose_00140-model.pth"
 
389
  den = "https://huggingface.co/KwaiVGI/LivePortrait/resolve/main/liveportrait/base_models/motion_extractor.pth"
390
  den_file = "dense_motion.pth"
391
 
 
392
  SADTALKER_KP_FOLDER = "checkpoints"
393
  SADTALKER_KP_MODEL_FILE = kp_file
394
  SADTALKER_KP_URL = kp
395
 
396
+ SADTALKER_AUD_FOLDER = "checkpoints"
397
  SADTALKER_AUD_MODEL_FILE = aud_file
398
  SADTALKER_AUD_URL = aud
399
 
400
+ SADTALKER_WAV_FOLDER = "checkpoints"
401
  SADTALKER_WAV_MODEL_FILE = wav_file
402
  SADTALKER_WAV_URL = wav
403
 
404
+ SADTALKER_GEN_FOLDER = "checkpoints"
405
  SADTALKER_GEN_MODEL_FILE = gen_file
406
  SADTALKER_GEN_URL = gen
407
 
408
+ SADTALKER_MAPX_FOLDER = "checkpoints"
409
  SADTALKER_MAPX_MODEL_FILE = mapx_file
410
  SADTALKER_MAPX_URL = mapx
411
 
412
+ SADTALKER_DEN_FOLDER = "checkpoints"
413
  SADTALKER_DEN_MODEL_FILE = den_file
414
  SADTALKER_DEN_URL = den
415
 
 
 
 
 
 
 
416
  SADTALKER_CHECKPOINTS_FOLDER = "./checkpoints"
417
+ SADTALKER_CONFIG_FOLDER = "./src/config"
extensions.py CHANGED
@@ -175,7 +175,7 @@ def save_video_with_watermark(video_frames, audio_path, output_path, watermark_p
175
  watermark_h, watermark_w = watermark.shape[:2]
176
  start_h = frame_h - watermark_h - 10
177
  start_w = frame_w - watermark_w - 10
178
- frame[start_h:start_h+watermark_h, start_w:start_w+watermark_w, :] = watermark
179
  writer.append_data(img_as_ubyte(frame))
180
  except Exception as e:
181
  print(f"Error in video writing: {e}")
 
175
  watermark_h, watermark_w = watermark.shape[:2]
176
  start_h = frame_h - watermark_h - 10
177
  start_w = frame_w - watermark_w - 10
178
+ frame[start_h:start_h+watermark_h, start_w:start_w+watermark_h, :] = watermark
179
  writer.append_data(img_as_ubyte(frame))
180
  except Exception as e:
181
  print(f"Error in video writing: {e}")
image_to_3d_api.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import uuid
3
  from flask import jsonify, send_file, request
4
  from main import *
5
- #from main import import image_to_3d_model, device
6
  from PIL import Image
7
  import torch
8
  import numpy as np
 
2
  import uuid
3
  from flask import jsonify, send_file, request
4
  from main import *
 
5
  from PIL import Image
6
  import torch
7
  import numpy as np
imagegen_api.py CHANGED
@@ -3,7 +3,6 @@ from flask import jsonify, send_file, request
3
  from io import BytesIO
4
  from PIL import Image
5
  from main import *
6
- #from main import import imagegen_model, device
7
  import torch
8
 
9
  def generate_image(prompt, output_path="output_image.png"):
 
3
  from io import BytesIO
4
  from PIL import Image
5
  from main import *
 
6
  import torch
7
 
8
  def generate_image(prompt, output_path="output_image.png"):
main.py CHANGED
@@ -115,4 +115,4 @@ if __name__ == "__main__":
115
  background_threads.append(threading.Thread(target=background_reasoning_queue, daemon=True))
116
  for thread in background_threads:
117
  thread.start()
118
- app.run(host='0.0.0.0', port=7860)
 
115
  background_threads.append(threading.Thread(target=background_reasoning_queue, daemon=True))
116
  for thread in background_threads:
117
  thread.start()
118
+ app.run(host='0.0.0.0', port=7860)
model_loader.py CHANGED
@@ -671,4 +671,4 @@ def initialize_musicgen_model(folder, files):
671
  sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
672
  load_state_dict_safe(model, sd)
673
  model.eval()
674
- return model
 
671
  sd = torch.load(os.path.join(folder, "pytorch_model.bin"), map_location=device)
672
  load_state_dict_safe(model, sd)
673
  model.eval()
674
+ return model
models.py CHANGED
@@ -3,9 +3,7 @@ import torch.nn as nn
3
  import torch.nn.functional as F
4
  import math
5
  import copy
6
- #from configs import GPT2Config, MBartConfig, CodeGenConfig, SummarizationConfig, OpenLRMConfig, UNet2DConditionModelConfig, AutoencoderKLConfig, BartConfig, MusicGenConfig
7
  from configs import *
8
- #from extensions import gelu, LayerNorm, Conv1D, Attention, MLP, Block, GPT2Model, GPT2LMHead, MBartEncoderLayer, MBartDecoderLayer, MBartEncoder, MBartDecoder, MBartModel, MBartForConditionalGeneration, CodeGenAttention, CodeGenBlock, CodeGenModel, CodeGenForCausalLM, SummarizationModel, OpenLRM, OpenLRMLayer, OpenLRMAttention, OpenLRMFeedForward, AutoencoderKL, Encoder_, Decoder_, DownBlock, UpBlock, ResnetBlock, MidBlock, Downsample2D, Upsample2D, UNet2DConditionModel, UNetMidBlock2DConditionModel, UNetDownBlock2DConditionModel, UNetUpBlock2DConditionModel, ResnetBlock2D, CrossAttentionBlock2D, CrossAttention, SimpleClassifier
9
  from extensions import *
10
 
11
  class SentimentClassifierModel(nn.Module):
@@ -93,4 +91,4 @@ class MusicGenModel(nn.Module):
93
  audio_output.append(predicted_token.cpu())
94
  input_tokens = torch.cat((input_tokens, predicted_token), dim=1)
95
  audio_output = torch.cat(audio_output, dim=1).float()
96
- return audio_output
 
3
  import torch.nn.functional as F
4
  import math
5
  import copy
 
6
  from configs import *
 
7
  from extensions import *
8
 
9
  class SentimentClassifierModel(nn.Module):
 
91
  audio_output.append(predicted_token.cpu())
92
  input_tokens = torch.cat((input_tokens, predicted_token), dim=1)
93
  audio_output = torch.cat(audio_output, dim=1).float()
94
+ return audio_output
musicgen_api.py CHANGED
@@ -1,6 +1,5 @@
1
  from flask import jsonify, send_file, request
2
  from main import *
3
- #from main import import musicgen_model, device
4
  import torch
5
  import soundfile as sf
6
  import numpy as np
@@ -12,7 +11,7 @@ def generate_music(prompt, output_path="output_music.wav"):
12
 
13
  attributes = [prompt]
14
  sample_rate = 32000
15
- duration = 8
16
  audio_values = musicgen_model.sample(
17
  attributes=attributes,
18
  sample_rate=sample_rate,
 
1
  from flask import jsonify, send_file, request
2
  from main import *
 
3
  import torch
4
  import soundfile as sf
5
  import numpy as np
 
11
 
12
  attributes = [prompt]
13
  sample_rate = 32000
14
+ duration = 60
15
  audio_values = musicgen_model.sample(
16
  attributes=attributes,
17
  sample_rate=sample_rate,
sadtalker_api.py CHANGED
@@ -7,11 +7,9 @@ import requests
7
  from urllib.parse import urlparse
8
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form, WebSocket
9
  from fastapi.responses import JSONResponse
10
- #from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi import APIRouter
12
  from extensions import *
13
  from main import *
14
- #from main import import sadtalker_instance
15
  from tts_api import *
16
  from sadtalker_utils import *
17
  import base64
 
7
  from urllib.parse import urlparse
8
  from fastapi import FastAPI, UploadFile, File, HTTPException, Form, WebSocket
9
  from fastapi.responses import JSONResponse
 
10
  from fastapi import APIRouter
11
  from extensions import *
12
  from main import *
 
13
  from tts_api import *
14
  from sadtalker_utils import *
15
  import base64
sadtalker_utils.py CHANGED
@@ -863,4 +863,4 @@ class FaceEnhancer(nn.Module):
863
  self.face_enhancer = None
864
 
865
  def forward(self, x):
866
- return self.face_enhancer.enhance(x, outscale=1)[0]
 
863
  self.face_enhancer = None
864
 
865
  def forward(self, x):
866
+ return self.face_enhancer.enhance(x, outscale=1)[0]
sentiment_api.py CHANGED
@@ -1,6 +1,5 @@
1
  from flask import jsonify
2
  from main import *
3
- #from main import import sentiment_model, device
4
  import torch
5
 
6
  def analyze_sentiment(text, output_path="output_sentiment.json"):
 
1
  from flask import jsonify
2
  from main import *
 
3
  import torch
4
 
5
  def analyze_sentiment(text, output_path="output_sentiment.json"):
stt_api.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import uuid
3
  from flask import jsonify, send_file, request
4
  from main import *
5
- #from main import import stt_model, device
6
  import torch
7
  import torchaudio
8
 
 
2
  import uuid
3
  from flask import jsonify, send_file, request
4
  from main import *
 
5
  import torch
6
  import torchaudio
7
 
summarization_api.py CHANGED
@@ -1,6 +1,5 @@
1
  from flask import jsonify, send_file, request
2
  from main import *
3
- #from main import import summarization_model, summarization_word_to_index, device
4
  import torch
5
 
6
  def summarize_text(text, output_path="output_summary.txt"):
@@ -11,7 +10,7 @@ def summarize_text(text, output_path="output_summary.txt"):
11
  input_tensor = torch.tensor([input_tokens], dtype=torch.long).to(device)
12
 
13
  with torch.no_grad():
14
- summary_ids = summarization_model.generate(input_tensor, num_beams=4, max_length=100, early_stopping=True)
15
  summary_text = summarization_model.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
16
 
17
  with open(output_path, "w") as file:
 
1
  from flask import jsonify, send_file, request
2
  from main import *
 
3
  import torch
4
 
5
  def summarize_text(text, output_path="output_summary.txt"):
 
10
  input_tensor = torch.tensor([input_tokens], dtype=torch.long).to(device)
11
 
12
  with torch.no_grad():
13
+ summary_ids = summarization_model.generate(input_tensor, num_beams=4, max_length=999999999, early_stopping=True)
14
  summary_text = summarization_model.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
15
 
16
  with open(output_path, "w") as file:
text_generation.py CHANGED
@@ -4,7 +4,6 @@ from tqdm import trange
4
  import time
5
  from tokenxxx import *
6
  from main import *
7
- #from main import import model_gpt2, enc, codegen_model, codegen_tokenizer, summarization_model, device, system_prompt, MAX_LENGTH, summarize_text as summarize_func
8
  from duckduckgo_search import DDGS
9
 
10
  def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
@@ -48,10 +47,7 @@ def sample_sequence(prompt, model, enc, length, temperature=1, top_k=0, top_p=0.
48
  yield enc.decode([token])
49
  if token == enc.encoder[END_OF_TEXT_TOKEN]:
50
  yield "<END_STREAM>"
51
- if text_generated_count > length:
52
- yield "<END_STREAM>"
53
- if (time.time() - start_time) * 1000 > 5000:
54
- yield "<END_STREAM>"
55
 
56
  def sample_sequence_codegen(prompt, model, tokenizer, length, temperature=1, top_k=0, top_p=0.0, repetition_penalty=1.0, device="cpu"):
57
  start_time = time.time()
@@ -77,10 +73,7 @@ def sample_sequence_codegen(prompt, model, tokenizer, length, temperature=1, top
77
  yield tokenizer.decode([token])
78
  if token == 50256:
79
  yield "<END_STREAM>"
80
- if text_generated_count > length:
81
- yield "<END_STREAM>"
82
- if (time.time() - start_time) * 1000 > 5000:
83
- yield "<END_STREAM>"
84
 
85
  def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_penalty):
86
  try:
@@ -105,7 +98,7 @@ def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_p
105
  prompt=reasoning_prompt,
106
  model=codegen_model,
107
  tokenizer=codegen_tokenizer,
108
- length=MAX_LENGTH,
109
  temperature=temperature,
110
  top_k=top_k,
111
  top_p=top_p,
@@ -125,7 +118,7 @@ def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_p
125
  prompt=reasoning_prompt,
126
  model=model_gpt2,
127
  enc=enc,
128
- length=MAX_LENGTH,
129
  temperature=temperature,
130
  top_k=top_k,
131
  top_p=top_p,
 
4
  import time
5
  from tokenxxx import *
6
  from main import *
 
7
  from duckduckgo_search import DDGS
8
 
9
  def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
 
47
  yield enc.decode([token])
48
  if token == enc.encoder[END_OF_TEXT_TOKEN]:
49
  yield "<END_STREAM>"
50
+
 
 
 
51
 
52
  def sample_sequence_codegen(prompt, model, tokenizer, length, temperature=1, top_k=0, top_p=0.0, repetition_penalty=1.0, device="cpu"):
53
  start_time = time.time()
 
73
  yield tokenizer.decode([token])
74
  if token == 50256:
75
  yield "<END_STREAM>"
76
+
 
 
 
77
 
78
  def perform_reasoning_stream(text_input, temperature, top_k, top_p, repetition_penalty):
79
  try:
 
98
  prompt=reasoning_prompt,
99
  model=codegen_model,
100
  tokenizer=codegen_tokenizer,
101
+ length=999999999,
102
  temperature=temperature,
103
  top_k=top_k,
104
  top_p=top_p,
 
118
  prompt=reasoning_prompt,
119
  model=model_gpt2,
120
  enc=enc,
121
+ length=999999999,
122
  temperature=temperature,
123
  top_k=top_k,
124
  top_p=top_p,
text_to_video_api.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  import uuid
3
  from flask import jsonify, send_file, request
4
  from main import *
5
- #from main import import text_to_video_model
6
  import torch
7
  import io
8
  from skimage import img_as_ubyte
 
2
  import uuid
3
  from flask import jsonify, send_file, request
4
  from main import *
 
5
  import torch
6
  import io
7
  from skimage import img_as_ubyte
tokenxxx.py CHANGED
@@ -140,22 +140,3 @@ def codegen_tokenize(text, tokenizer):
140
 
141
  def codegen_decode(tokens, tokenizer):
142
  return tokenizer.decode(tokens)
143
-
144
- def tokenize_text(text):
145
- global vocabulary, word_to_index, index_to_word
146
- tokens = text.lower().split()
147
- for token in tokens:
148
- if token not in vocabulary:
149
- vocabulary.add(token)
150
- word_to_index[token] = len(index_to_word)
151
- index_to_word.append(token)
152
- return tokens
153
-
154
- def text_to_vector(text):
155
- global vocabulary, word_to_index
156
- tokens = tokenize_text(text)
157
- vector = torch.zeros(len(vocabulary))
158
- for token in tokens:
159
- if token in word_to_index:
160
- vector[word_to_index[token]] += 1
161
- return vector
 
140
 
141
  def codegen_decode(tokens, tokenizer):
142
  return tokenizer.decode(tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
translation_api.py CHANGED
@@ -1,6 +1,5 @@
1
  from flask import jsonify, send_file, request
2
  from main import *
3
- #from main import import translation_model, device
4
 
5
  def perform_translation(text, target_language_code='es_XX', source_language_code='en_XX', output_path="output_translation.txt"):
6
  if translation_model is None:
 
1
  from flask import jsonify, send_file, request
2
  from main import *
 
3
 
4
  def perform_translation(text, target_language_code='es_XX', source_language_code='en_XX', output_path="output_translation.txt"):
5
  if translation_model is None:
tts_api.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  from flask import jsonify, send_file, request
3
  from main import *
4
- #from main import import tts_model, device
5
 
6
  def text_to_speech_func(text, output_path="output_tts.wav"):
7
  if tts_model is None:
 
1
  import os
2
  from flask import jsonify, send_file, request
3
  from main import *
 
4
 
5
  def text_to_speech_func(text, output_path="output_tts.wav"):
6
  if tts_model is None:
utils.py CHANGED
@@ -187,4 +187,4 @@ def get_codegen_tokenizer_pure(vocab_file, merges_file):
187
  byte_decoder=byte_decoder,
188
  tokenize=tokenize
189
  )
190
- return encoder_obj
 
187
  byte_decoder=byte_decoder,
188
  tokenize=tokenize
189
  )
190
+ return encoder_obj