Upload 3 files

Browse files

Files changed (3) hide show

architecture.py +129 -0
config.json +7 -0
generate.py +138 -0

architecture.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import xformers.ops as xops
+class SmallGPT(nn.Module):
+    def __init__(self, vocab_size, d_model=256, n_heads=8, n_layers=6, max_length=128, pad_idx=0):
+        super().__init__()
+        self.d_model = d_model
+        self.max_length = max_length
+        # Embeddings
+        self.token_embedding = nn.Embedding(vocab_size, d_model, padding_idx=pad_idx)
+        self.position_embedding = nn.Embedding(max_length, d_model)
+        # Transformer blocks
+        self.blocks = nn.ModuleList([
+            TransformerBlock(d_model, n_heads) for _ in range(n_layers)
+        ])
+        # Output
+        self.ln_f = nn.LayerNorm(d_model)
+        self.head = nn.Linear(d_model, vocab_size, bias=False)
+        # Init weights
+        self.apply(self._init_weights)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.03)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.03)
+    def forward(self, x):
+        batch_size, seq_len = x.size()
+        # position indices
+        pos = torch.arange(0, seq_len, dtype=torch.long, device=x.device)
+        pos = pos.unsqueeze(0).expand(batch_size, seq_len)
+        # Embeddings
+        tok_emb = self.token_embedding(x)
+        pos_emb = self.position_embedding(pos)
+        x = tok_emb + pos_emb
+        # Transformer blocks
+        for block in self.blocks:
+            x = block(x)
+        # Final layer norm and projection
+        x = self.ln_f(x)
+        logits = self.head(x)
+        return logits
+class TransformerBlock(nn.Module):
+    def __init__(self, d_model, n_heads):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(d_model)
+        self.attn = CausalSelfAttention(d_model, n_heads)
+        self.ln2 = nn.LayerNorm(d_model)
+        self.mlp = MLP(d_model)
+    def forward(self, x):
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+class CausalSelfAttention(nn.Module):
+    def __init__(self, d_model, n_heads):
+        super().__init__()
+        assert d_model % n_heads == 0
+        self.n_heads = n_heads
+        self.d_model = d_model
+        self.head_dim = d_model // n_heads
+        self.qkv = nn.Linear(d_model, 3 * d_model)
+        self.proj = nn.Linear(d_model, d_model)
+    def forward(self, x):
+        batch, seq_len, d_model = x.size()
+        qkv = self.qkv(x)  # [B, S, 3*D]
+        q, k, v = qkv.chunk(3, dim=-1)
+        # reshape into heads
+        q = q.view(batch, seq_len, self.n_heads, self.head_dim).transpose(1, 2)  # [B, H, S, Hd]
+        k = k.view(batch, seq_len, self.n_heads, self.head_dim).transpose(1, 2)
+        v = v.view(batch, seq_len, self.n_heads, self.head_dim).transpose(1, 2)
+        # flatten for xformers: [B*H, S, Hd]
+        q = q.reshape(batch * self.n_heads, seq_len, self.head_dim)
+        k = k.reshape(batch * self.n_heads, seq_len, self.head_dim)
+        v = v.reshape(batch * self.n_heads, seq_len, self.head_dim)
+        # apply memory-efficient attention with causal mask
+        out = xops.memory_efficient_attention(q, k, v, attn_bias=xops.LowerTriangularMask())
+        # out: [B*H, S, Hd]
+        # reshape back
+        out = out.view(batch, self.n_heads, seq_len, self.head_dim)
+        out = out.transpose(1, 2).contiguous().view(batch, seq_len, d_model)
+        return self.proj(out)
+class MLP(nn.Module):
+    def __init__(self, d_model):
+        super().__init__()
+        self.fc1 = nn.Linear(d_model, 4 * d_model)
+        self.fc2 = nn.Linear(4 * d_model, d_model)
+        self.silu = nn.SiLU()
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.silu(x)
+        x = self.fc2(x)
+        return x
+DEFAULT_CONFIG = {
+    "vocab_size": 24_005,
+    "d_model": 256,
+    "n_heads": 8,
+    "n_layers": 6,
+    "max_length": 128,
+}

config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "vocab_size": 24005,
+    "d_model": 256,
+    "n_heads": 8,
+    "n_layers": 6,
+    "max_length": 128
+}

generate.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import torch
+import sys
+import time
+from architecture import SmallGPT
+from tokenizers import Tokenizer
+def load_tokenizer(path="smptokenizer/tokenizer.json"):
+    tokenizer = Tokenizer.from_file(path)
+    return tokenizer
+def generate_text_streaming(model, tokenizer, start_text, device, max_length=64, temperature=1.0, max_new_tokens=20, repetition_penalty=1.2):
+    """
+    Generates text token by token, yielding each new token.
+    """
+    model.eval()
+    # Encode start text
+    input_ids = tokenizer.encode(start_text).ids
+    generated_ids = []
+    # Print the starting text, and wait for the model to continue
+    print("Generated Sentence:")
+    print(start_text, end="", flush=True)
+    current_ids = input_ids
+    with torch.no_grad():
+        for _ in range(max_new_tokens):
+            # Prepare input (truncate if too long)
+            current_input = current_ids[-max_length+1:] if len(current_ids) >= max_length else current_ids
+            input_tensor = torch.tensor([current_input], dtype=torch.long, device=device)
+            # Get output
+            logits = model(input_tensor)
+            # Get logits for last position
+            next_token_logits = logits[0, -1, :] / temperature
+            # Apply repetition penalty, if needed
+            if repetition_penalty > 1.0:
+                for token_id in set(current_ids):
+                    next_token_logits[token_id] /= repetition_penalty
+            # Sample next token
+            probs = torch.softmax(next_token_logits, dim=-1)
+            next_token_id = torch.multinomial(probs, 1).item()
+            # Check for EOS
+            if next_token_id == tokenizer.token_to_id("<eos>"):
+                break
+            generated_ids.append(next_token_id)
+            current_ids.append(next_token_id)
+            # Decode and yield the new token
+            new_token = tokenizer.decode([next_token_id])
+            yield new_token
+def main(seed):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+    # Load tokenizer
+    tokenizer_path = "smptokenizer/tokenizer.json"
+    tokenizer = load_tokenizer(tokenizer_path)
+    vocab_size = tokenizer.get_vocab_size()
+    pad_id = tokenizer.token_to_id("<pad>") or 0
+    # Model parameters from training
+    d_model = 256
+    n_heads = 8
+    n_layers = 6
+    max_length = 172
+    # Instantiate the model
+    model = SmallGPT(
+        vocab_size=vocab_size,
+        d_model=d_model,
+        n_heads=n_heads,
+        n_layers=n_layers,
+        max_length=max_length,
+        pad_idx=pad_id,
+    ).to(device)
+    # Load the trained model weights
+    model_path = "models/pytorch_model.bin" # idk if safetensor works
+    try:
+        model.load_state_dict(torch.load(model_path, map_location=device))
+        model.eval()
+        print(f"Model loaded from {model_path}")
+    except FileNotFoundError:
+        print(f"Error: Model file not found at {model_path}")
+        print("Please ensure the model is trained and the path is correct.")
+        return
+    while True:
+        # Reset seed
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed(seed)
+        start_text = input("Enter a starting word or phrase (or 'quit' to exit): ")
+        if start_text.lower() == 'quit':
+            break
+        if not start_text.strip():
+            print("Please enter some text. We are using a random character as a starting point.")
+            start_text = str(time.time())
+        print("Generating...")
+        token_count = 0
+        start_time = time.time()
+        for token in generate_text_streaming(
+            model=model,
+            tokenizer=tokenizer,
+            start_text=start_text,
+            device=device,
+            max_new_tokens=1000,
+            temperature=0.7,
+            max_length=max_length,
+            repetition_penalty=1.2
+        ):
+            print(token, end="", flush=True)
+            token_count += 1
+        end_time = time.time()
+        elapsed_time = end_time - start_time
+        tokens_per_sec = token_count / elapsed_time if elapsed_time > 0 else 0
+        print(f"\n\nPerformance: {tokens_per_sec:.2f} tokens/sec")
+        print("-" * 30)
+if __name__ == "__main__":
+    seed = 42
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    main(seed)