Ffftdtd5dtft commited on
Commit
6226435
verified
1 Parent(s): 09cdcf1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +276 -0
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torch.nn as nn
4
+ from torch.utils.data import DataLoader, Dataset
5
+ from torch.optim import AdamW
6
+ import matplotlib.pyplot as plt
7
+ import matplotlib.animation as animation
8
+ import time
9
+ import threading
10
+ from tqdm import tqdm
11
+ from transformers import AutoTokenizer, AutoModel, TrainingArguments, pipeline
12
+ from diffusers import DiffusionPipeline
13
+ from huggingface_hub import login, HfApi, Repository
14
+ from dotenv import load_dotenv
15
+
16
+ # Cargar variables de entorno
17
+ load_dotenv()
18
+
19
+ class UnifiedModel(nn.Module):
20
+ def __init__(self, models):
21
+ super(UnifiedModel, self).__init__()
22
+ self.models = nn.ModuleList(models)
23
+ self.classifier = nn.Linear(sum([model.config.hidden_size for model in models if hasattr(model, 'config')]), 2)
24
+
25
+ def forward(self, inputs):
26
+ hidden_states = []
27
+ for model in self.models:
28
+ if isinstance(model, nn.Module):
29
+ outputs = model(inputs)
30
+ hidden_states.append(outputs.last_hidden_state[:, 0, :])
31
+ elif isinstance(model, DiffusionPipeline) or isinstance(model, pipeline):
32
+ outputs = model(inputs)
33
+ hidden_states.append(torch.tensor(outputs))
34
+ concatenated_hidden_states = torch.cat(hidden_states, dim=-1)
35
+ logits = self.classifier(concatenated_hidden_states)
36
+ return logits
37
+
38
+
39
+ class SyntheticDataset(Dataset):
40
+ def __init__(self, tokenizers, size=100):
41
+ self.tokenizers = tokenizers
42
+ self.size = size
43
+ self.data = self._generate_data()
44
+
45
+ def _generate_data(self):
46
+ data = []
47
+ for _ in range(self.size):
48
+ text = "This is a sample sentence for testing purposes."
49
+ label = torch.tensor(0) # Sample label
50
+ item = {"text": text, "label": label}
51
+ for name, tokenizer in self.tokenizers.items():
52
+ tokenized = tokenizer(text, padding="max_length", truncation=True, max_length=128)
53
+ item[f"input_ids_{name}"] = torch.tensor(tokenized["input_ids"])
54
+ item[f"attention_mask_{name}"] = torch.tensor(tokenized["attention_mask"])
55
+ data.append(item)
56
+ return data
57
+
58
+ def __len__(self):
59
+ return len(self.data)
60
+
61
+ def __getitem__(self, idx):
62
+ return self.data[idx]
63
+
64
+
65
+ def push_to_hub(local_dir, repo_name):
66
+ try:
67
+ repo_url = HfApi().create_repo(repo_name, exist_ok=True)
68
+ repo = Repository(local_dir, clone_from=repo_url)
69
+
70
+ if not os.path.exists(os.path.join(local_dir, ".git")):
71
+ os.system(f"cd {local_dir} && git init && git remote add origin {repo_url} && git pull origin main")
72
+
73
+ repo.git_add(auto_lfs_track=True)
74
+ repo.git_commit("Add model and tokenizer files")
75
+
76
+ json_files = ["config.json", "generation_config.json", "special_tokens_map.json", "tokenizer.json", "tokenizer.model", "tokenizer_config.json"]
77
+ for json_file in json_files:
78
+ json_file_path = os.path.join(local_dir, json_file)
79
+ if os.path.exists(json_file_path):
80
+ repo.git_add(json_file_path)
81
+
82
+ repo.git_push()
83
+ print(f"Pushed model and tokenizer to {repo_url}")
84
+ except Exception as e:
85
+ print(f"Error pushing to Hugging Face Hub: {e}")
86
+
87
+
88
+ def main():
89
+ while True:
90
+ try:
91
+ os.system("git config --global credential.helper store")
92
+ login(token=os.getenv("HUGGINGFACE_TOKEN"), add_to_git_credential=True)
93
+
94
+ # Definir los modelos que se van a utilizar
95
+ models_to_train = [
96
+ "openai-community/gpt2-xl",
97
+ "google/gemma-2-9b-it",
98
+ "google/gemma-2-9b",
99
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
100
+ "meta-llama/Meta-Llama-3.1-8B",
101
+ "openbmb/MiniCPM-V-2_6",
102
+ "bigcode/starcoder",
103
+ "WizardLMTeam/WizardCoder-Python-34B-V1.0",
104
+ "Qwen/Qwen2-72B-Instruct",
105
+ "google/gemma-2-2b-it",
106
+ "facebook/bart-large-cnn",
107
+ "Falconsai/text_summarization",
108
+ "microsoft/speecht5_tts",
109
+ "Groq/Llama-3-Groq-70B-Tool-Use",
110
+ "Groq/Llama-3-Groq-8B-Tool-Use"
111
+ ]
112
+
113
+ # Inicializar los pipelines
114
+ pipelines_to_unify = [
115
+ pipeline("text-to-audio", model="facebook/musicgen-melody"),
116
+ pipeline("text-to-audio", model="facebook/musicgen-large"),
117
+ pipeline("text-to-audio", model="facebook/musicgen-small"),
118
+ DiffusionPipeline.from_pretrained("stabilityai/stable-video-diffusion-img2vid-xt-1-1"),
119
+ pipeline("automatic-speech-recognition", model="openai/whisper-small"),
120
+ DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev"),
121
+ DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1"),
122
+ DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell"),
123
+ pipeline("text-generation", model="meta-llama/Meta-Llama-3.1-8B"),
124
+ pipeline("text-generation", model="openbmb/MiniCPM-V-2_6"),
125
+ pipeline("text-generation", model="bigcode/starcoder"),
126
+ pipeline("text-to-speech", model="microsoft/speecht5_tts"),
127
+ pipeline("text-generation", model="WizardLMTeam/WizardCoder-Python-34B-V1.0"),
128
+ pipeline("text-generation", model="Qwen/Qwen2-72B-Instruct"),
129
+ pipeline("text-generation", model="google/gemma-2-2b-it"),
130
+ pipeline("summarization", model="facebook/bart-large-cnn"),
131
+ pipeline("summarization", model="Falconsai/text_summarization"),
132
+ DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev"),
133
+ pipeline("text-to-audio", model="facebook/musicgen-small"),
134
+ pipeline("text-generation", model="Groq/Llama-3-Groq-70B-Tool-Use"),
135
+ pipeline("text-generation", model="Groq/Llama-3-Groq-8B-Tool-Use")
136
+ ]
137
+
138
+ tokenizers = {}
139
+ models = []
140
+ for model_name in models_to_train:
141
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
142
+
143
+ if tokenizer.pad_token is None:
144
+ tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
145
+
146
+ model = AutoModel.from_pretrained(model_name)
147
+ tokenizers[model_name] = tokenizer
148
+ models.append(model)
149
+
150
+ # Agregar pipelines como modelos
151
+ models.extend(pipelines_to_unify)
152
+
153
+ # Crear un dataset sint茅tico para entrenamiento y evaluaci贸n
154
+ synthetic_dataset = SyntheticDataset(tokenizers, size=100)
155
+
156
+ # Dividir el dataset en entrenamiento y evaluaci贸n
157
+ train_size = int(0.8 * len(synthetic_dataset))
158
+ val_size = len(synthetic_dataset) - train_size
159
+ train_dataset, val_dataset = torch.utils.data.random_split(synthetic_dataset, [train_size, val_size])
160
+
161
+ # Crear DataLoaders para entrenamiento y evaluaci贸n
162
+ train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
163
+ eval_loader = DataLoader(val_dataset, batch_size=16)
164
+
165
+ # Unificar los modelos y pipelines en uno solo
166
+ unified_model = UnifiedModel(models)
167
+ unified_model.to(torch.device("cpu"))
168
+
169
+ # Mostrar la cantidad de par谩metros totales a entrenar
170
+ total_params = sum(p.numel() for p in unified_model.parameters())
171
+ print(f"Total parameters to train: {total_params}")
172
+
173
+ # Definir los argumentos de entrenamiento
174
+ training_args = TrainingArguments(
175
+ output_dir="outputs/unified_model",
176
+ evaluation_strategy="epoch",
177
+ learning_rate=9e-4,
178
+ per_device_train_batch_size=2,
179
+ per_device_eval_batch_size=16,
180
+ num_train_epochs=1, # Reduced epochs for quick training
181
+ weight_decay=0.01,
182
+ logging_steps=10, # More frequent logging for quicker feedback
183
+ optim="adamw_hf"
184
+ )
185
+
186
+ # Definir el optimizador
187
+ optimizer = AdamW(unified_model.parameters(), lr=training_args.learning_rate)
188
+
189
+ train_losses = []
190
+ eval_losses = []
191
+
192
+ def train(model, train_loader, eval_loader, args):
193
+ model.train()
194
+ epoch = 0
195
+ total_steps = args.num_train_epochs * len(train_loader)
196
+ progress_bar = tqdm(total=total_steps, desc="Training")
197
+
198
+ while epoch < args.num_train_epochs:
199
+ start_time = time.time()
200
+ for step, batch in enumerate(train_loader):
201
+ input_ids = [batch[f"input_ids_{name}"].to("cpu") for name in tokenizers.keys()]
202
+ attention_mask = [batch[f"attention_mask_{name}"].to("cpu") for name in tokenizers.keys()]
203
+ labels = batch["label"].to("cpu")
204
+ optimizer.zero_grad()
205
+ outputs = model(input_ids)
206
+ loss = nn.CrossEntropyLoss()(outputs, labels)
207
+ loss.backward()
208
+ optimizer.step()
209
+ progress_bar.update(1)
210
+
211
+ elapsed_time = time.time() - start_time
212
+ estimated_total_time = total_steps * (elapsed_time / (step + 1))
213
+ estimated_remaining_time = estimated_total_time - elapsed_time
214
+
215
+ if step % args.logging_steps == 0:
216
+ train_losses.append(loss.item())
217
+ print(f"Step {step}/{total_steps}, Loss: {loss.item()}, Estimated remaining time: {estimated_remaining_time:.2f} seconds")
218
+
219
+ epoch += 1
220
+ model.eval()
221
+ eval_loss = 0
222
+ with torch.no_grad():
223
+ for batch in eval_loader:
224
+ input_ids = [batch[f"input_ids_{name}"].to("cpu") for name in tokenizers.keys()]
225
+ attention_mask = [batch[f"attention_mask_{name}"].to("cpu") for name in tokenizers.keys()]
226
+ labels = batch["label"].to("cpu")
227
+ outputs = model(input_ids)
228
+ loss = nn.CrossEntropyLoss()(outputs, labels)
229
+ eval_loss += loss.item()
230
+
231
+ eval_loss /= len(eval_loader)
232
+ eval_losses.append(eval_loss)
233
+ print(f"Epoch {epoch}/{args.num_train_epochs}, Evaluation Loss: {eval_loss}")
234
+
235
+ train(unified_model, train_loader, eval_loader, training_args)
236
+
237
+ # Visualizar p茅rdidas durante el entrenamiento
238
+ fig, ax = plt.subplots()
239
+ ax.set_xlabel("Epochs")
240
+ ax.set_ylabel("Loss")
241
+ ax.legend()
242
+
243
+ def animate(i):
244
+ ax.clear()
245
+ ax.plot(train_losses[:i], label="Train Loss")
246
+ ax.plot(eval_losses[:i], label="Eval Loss")
247
+ ax.legend()
248
+
249
+ ani = animation.FuncAnimation(fig, animate, frames=len(train_losses), blit=False)
250
+ plt.show()
251
+
252
+ # Subir el modelo unificado a Hugging Face Hub
253
+ local_dir = "./outputs/unified_model"
254
+ push_to_hub(local_dir, repo_name="Ffftdtd5dtft/my_model")
255
+
256
+ break
257
+ except Exception as e:
258
+ print(f"Error: {e}")
259
+ time.sleep(2)
260
+
261
+
262
+
263
+ def gradio_app():
264
+ with gr.Blocks() as app:
265
+ gr.Markdown(
266
+ """
267
+ # IA Generativa con Transformers y Diffusers
268
+ Explora diferentes modelos de IA para generar texto, im谩genes, audio, video y m谩s.
269
+ """
270
+ )
271
+ app.launch()
272
+
273
+
274
+ if __name__ == "__main__":
275
+ gradio_app()
276
+ main()