xcx0902 commited on
Commit
493e08a
·
verified ·
1 Parent(s): f78eced

Upload folder using huggingface_hub

Browse files
SimpleRNN.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class SimpleRNN(nn.Module):
5
+ def __init__(self, input_size, hidden_size, output_size):
6
+ super(SimpleRNN, self).__init__()
7
+ self.input_size = input_size
8
+ self.hidden_size = hidden_size
9
+ self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
10
+ self.fc = nn.Linear(hidden_size, output_size)
11
+
12
+ def forward(self, x, hidden):
13
+ x = torch.nn.functional.one_hot(x, num_classes=self.input_size).float()
14
+ out, hidden = self.rnn(x.unsqueeze(0), hidden)
15
+ out = self.fc(out[:, -1, :]) # Take last time step's output
16
+ return out, hidden
__pycache__/SimpleRNN.cpython-311.pyc ADDED
Binary file (1.73 kB). View file
 
parameter.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "hidden_size": 2048,
3
  "sequence_length": 5,
4
- "learning_rate": 0.0001,
5
  "model_path": "tiny_llm_hidden2048.pth"
6
  }
 
1
  {
2
  "hidden_size": 2048,
3
  "sequence_length": 5,
4
+ "learning_rate": 0.0002,
5
  "model_path": "tiny_llm_hidden2048.pth"
6
  }
run.py CHANGED
@@ -1,26 +1,11 @@
1
  import torch
2
- import torch.nn as nn
3
  import json
4
  from tqdm import tqdm, trange
5
 
6
- # Model parameters
7
  parameters = json.loads(open("parameter.json").read())
8
  model_path = parameters["model_path"]
9
 
10
- # Define the simple RNN model
11
- class SimpleRNN(nn.Module):
12
- def __init__(self, input_size, hidden_size, output_size):
13
- super(SimpleRNN, self).__init__()
14
- self.hidden_size = hidden_size
15
- self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
16
- self.fc = nn.Linear(hidden_size, output_size)
17
-
18
- def forward(self, x, hidden):
19
- x = torch.nn.functional.one_hot(x, num_classes=input_size).float()
20
- out, hidden = self.rnn(x.unsqueeze(0), hidden)
21
- out = self.fc(out[:, -1, :]) # Take last time step's output
22
- return out, hidden
23
-
24
  model = torch.load(model_path, weights_only=False)
25
  with open("vocab.json", "r") as f:
26
  chars = json.loads(f.read())
@@ -32,7 +17,6 @@ input_size = len(chars)
32
  hidden_size = parameters["hidden_size"]
33
  output_size = len(chars)
34
 
35
- # Text generation function
36
  def generate_text(start_text, length):
37
  model.eval()
38
  hidden = torch.zeros(1, 1, hidden_size)
@@ -47,7 +31,6 @@ def generate_text(start_text, length):
47
 
48
  return generated_text
49
 
50
- # Generate some text
51
  while True:
52
  prompt = input("Ask LLM: ")
53
  length = int(input("Length of text: "))
 
1
  import torch
2
+ from SimpleRNN import SimpleRNN
3
  import json
4
  from tqdm import tqdm, trange
5
 
 
6
  parameters = json.loads(open("parameter.json").read())
7
  model_path = parameters["model_path"]
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  model = torch.load(model_path, weights_only=False)
10
  with open("vocab.json", "r") as f:
11
  chars = json.loads(f.read())
 
17
  hidden_size = parameters["hidden_size"]
18
  output_size = len(chars)
19
 
 
20
  def generate_text(start_text, length):
21
  model.eval()
22
  hidden = torch.zeros(1, 1, hidden_size)
 
31
 
32
  return generated_text
33
 
 
34
  while True:
35
  prompt = input("Ask LLM: ")
36
  length = int(input("Length of text: "))
tiny_llm_hidden128.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc68ba1d2ccf1481ef6f369ecd6a5e8a402fffbd5d55233b3053964421ed0c0
3
+ size 134326
tiny_llm_hidden2048_legacy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bca2e08a529c052f0a8867dc6a6096c7c5efeba3e75865238932611fec3c63
3
+ size 17813696
tiny_llm_hidden512_legacy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b62f88c702e3d9d06394d079547ac58b37b2b70f4135aec363cdff9100e54d9
3
+ size 1310902
train.py CHANGED
@@ -1,18 +1,17 @@
1
  import torch
2
  import torch.nn as nn
3
  import torch.optim as optim
 
4
  import os
5
  import json
6
  from tqdm import tqdm, trange
7
  import time
8
 
9
- # Generate simple training data
10
  training_text = open("train_data.txt", encoding="utf-8").read()
11
  chars = sorted(list(set(training_text))) # Unique characters
12
  char_to_idx = {ch: i for i, ch in enumerate(chars)}
13
  idx_to_char = {i: ch for i, ch in enumerate(chars)}
14
 
15
- # Model parameters
16
  parameters = json.loads(open("parameter.json").read())
17
  input_size = len(chars)
18
  hidden_size = parameters["hidden_size"]
@@ -22,27 +21,12 @@ epochs = 1000
22
  learning_rate = parameters["learning_rate"]
23
  model_path = parameters["model_path"]
24
 
25
- # Create training data (input-output pairs)
26
  train_data = []
27
  for i in range(len(training_text) - sequence_length):
28
  input_seq = training_text[i : i + sequence_length]
29
  target_char = training_text[i + sequence_length]
30
  train_data.append((torch.tensor([char_to_idx[ch] for ch in input_seq]), char_to_idx[target_char]))
31
 
32
- # Define the simple RNN model
33
- class SimpleRNN(nn.Module):
34
- def __init__(self, input_size, hidden_size, output_size):
35
- super(SimpleRNN, self).__init__()
36
- self.hidden_size = hidden_size
37
- self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
38
- self.fc = nn.Linear(hidden_size, output_size)
39
-
40
- def forward(self, x, hidden):
41
- x = torch.nn.functional.one_hot(x, num_classes=input_size).float()
42
- out, hidden = self.rnn(x.unsqueeze(0), hidden)
43
- out = self.fc(out[:, -1, :]) # Take last time step's output
44
- return out, hidden
45
-
46
  if os.path.exists(model_path):
47
  model = torch.load(model_path, weights_only=False)
48
  print("Loaded pre-trained model. Continue training...")
@@ -77,7 +61,6 @@ for epoch in range(epochs):
77
  hidden = torch.zeros(1, 1, hidden_size)
78
  output, hidden = model(input_seq, hidden.detach())
79
 
80
- # Save the trained model
81
  torch.save(model, model_path)
82
  with open("vocab.json", "w") as f:
83
  f.write(json.dumps(chars))
 
1
  import torch
2
  import torch.nn as nn
3
  import torch.optim as optim
4
+ from SimpleRNN import SimpleRNN
5
  import os
6
  import json
7
  from tqdm import tqdm, trange
8
  import time
9
 
 
10
  training_text = open("train_data.txt", encoding="utf-8").read()
11
  chars = sorted(list(set(training_text))) # Unique characters
12
  char_to_idx = {ch: i for i, ch in enumerate(chars)}
13
  idx_to_char = {i: ch for i, ch in enumerate(chars)}
14
 
 
15
  parameters = json.loads(open("parameter.json").read())
16
  input_size = len(chars)
17
  hidden_size = parameters["hidden_size"]
 
21
  learning_rate = parameters["learning_rate"]
22
  model_path = parameters["model_path"]
23
 
 
24
  train_data = []
25
  for i in range(len(training_text) - sequence_length):
26
  input_seq = training_text[i : i + sequence_length]
27
  target_char = training_text[i + sequence_length]
28
  train_data.append((torch.tensor([char_to_idx[ch] for ch in input_seq]), char_to_idx[target_char]))
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  if os.path.exists(model_path):
31
  model = torch.load(model_path, weights_only=False)
32
  print("Loaded pre-trained model. Continue training...")
 
61
  hidden = torch.zeros(1, 1, hidden_size)
62
  output, hidden = model(input_seq, hidden.detach())
63
 
 
64
  torch.save(model, model_path)
65
  with open("vocab.json", "w") as f:
66
  f.write(json.dumps(chars))