# 1. Import and data processing

Import all necessary libraries

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split

Load the training data

In [2]:
data = np.load("./data.npy")

Define constants that describe the data and model

In [3]:
CONTEXT_SIZE = 10
ALPHABET = list("abcdefghijklmnopqrstuvwxyz")
ALPHABET_SIZE = len(ALPHABET)
TRAINING_DATA_SIZE = 0.9

# +1 is for unknown characters
VOCAB_SIZE = ALPHABET_SIZE + 1

EMBEDDING_DIM = 10

INPUT_SEQ_LEN = CONTEXT_SIZE
OUTPUT_SIZE = VOCAB_SIZE

BATCH_SIZE = 2048

EPOCHS = 30
LEARNING_RATE = 1e-3

Process the data

In [4]:
# Input: embeddings of the previous 10 letters
# shape: (num_samples, CONTEXT_SIZE)
X = data[:, :CONTEXT_SIZE]

# Target: current letter index
# shape: (num_samples,)
y = data[:, CONTEXT_SIZE]

# Torch dataset (important: use long/int64 for indices)
X_tensor = torch.tensor(X, dtype=torch.long)   # for nn.Embedding
y_tensor = torch.tensor(y, dtype=torch.long)   # for classification target

dataset = TensorDataset(X_tensor, y_tensor)

train_len = int(TRAINING_DATA_SIZE * len(dataset))
train_set, test_set = random_split(dataset, [train_len, len(dataset) - train_len])

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE)

# 2. Model

In [5]:
class Logistic(nn.Module):
    def __init__(self, *, embedding_count: int, embedding_dimension_size: int, context_size: int, output_shape: int):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings=embedding_count, embedding_dim=embedding_dimension_size)
        self.linear = nn.Linear(context_size * embedding_dimension_size, output_shape)

    def forward(self, x):
        embedded = self.embedding(x)             # (BATCH_SIZE, CONTEXT_SIZE, EMBEDDING_DIM)
        flattened = embedded.view(x.size(0), -1) # (BATCH_SIZE, CONTEXT_SIZE * EMBEDDING_DIM)
        return self.linear(flattened)            # (BATCH_SIZE, OUTPUT_SIZE)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


  return torch._C._cuda_getDeviceCount() > 0


# 3. Training

Create fresh instance of the model

In [7]:
model = Logistic(
    embedding_count=VOCAB_SIZE,                # e.g., 27 for aâ€“z + unknown
    embedding_dimension_size=EMBEDDING_DIM,    # e.g., 10
    context_size=CONTEXT_SIZE,                 # e.g., 10
    output_shape=OUTPUT_SIZE                   # e.g., 27 (next character)
).to(device)

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [9]:
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer.zero_grad()
        logits = model(batch_X)                # shape: (BATCH_SIZE, OUTPUT_SIZE)
        loss = criterion(logits, batch_y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * batch_X.size(0)

        # Compute accuracy
        preds = torch.argmax(logits, dim=1)
        correct += (preds == batch_y).sum().item()
        total += batch_X.size(0)

    avg_loss = total_loss / total
    accuracy = correct / total * 100
    print(f"[Epoch {epoch+1}] - Loss: {avg_loss:.4f} | Accuracy: {accuracy:.2f}%")


[Epoch 1] - Loss: 2.5968 | Accuracy: 23.06%
[Epoch 2] - Loss: 2.3218 | Accuracy: 30.02%
[Epoch 3] - Loss: 2.2600 | Accuracy: 31.25%
[Epoch 4] - Loss: 2.2325 | Accuracy: 31.55%
[Epoch 5] - Loss: 2.2171 | Accuracy: 31.75%
[Epoch 6] - Loss: 2.2076 | Accuracy: 31.98%
[Epoch 7] - Loss: 2.2006 | Accuracy: 32.22%
[Epoch 8] - Loss: 2.1962 | Accuracy: 32.36%
[Epoch 9] - Loss: 2.1925 | Accuracy: 32.42%
[Epoch 10] - Loss: 2.1900 | Accuracy: 32.48%
[Epoch 11] - Loss: 2.1876 | Accuracy: 32.54%
[Epoch 12] - Loss: 2.1859 | Accuracy: 32.64%
[Epoch 13] - Loss: 2.1847 | Accuracy: 32.65%
[Epoch 14] - Loss: 2.1833 | Accuracy: 32.76%
[Epoch 15] - Loss: 2.1821 | Accuracy: 32.75%
[Epoch 16] - Loss: 2.1813 | Accuracy: 32.74%
[Epoch 17] - Loss: 2.1806 | Accuracy: 32.84%
[Epoch 18] - Loss: 2.1799 | Accuracy: 32.81%
[Epoch 19] - Loss: 2.1792 | Accuracy: 32.80%
[Epoch 20] - Loss: 2.1786 | Accuracy: 32.81%
[Epoch 21] - Loss: 2.1780 | Accuracy: 32.77%
[Epoch 22] - Loss: 2.1776 | Accuracy: 32.85%
[Epoch 23] - Loss: 

In [10]:
model.eval()
correct_top1 = 0
correct_top3 = 0
correct_top5 = 0
total = 0

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)

        _, top_preds = outputs.topk(5, dim=1)

        for true, top5 in zip(batch_y, top_preds):
            total += 1
            if true == top5[0]:
                correct_top1 += 1
            if true in top5[:3]:
                correct_top3 += 1
            if true in top5:
                correct_top5 += 1

top1_acc = correct_top1 / total
top3_acc = correct_top3 / total
top5_acc = correct_top5 / total

print(f"Top 1 prediction accuracy: {(top1_acc * 100):.2f}%")
print(f"Top 3 prediction accuracy: {(top3_acc * 100):.2f}%")
print(f"Top 5 prediction accuracy: {(top5_acc * 100):.2f}%")

Top 1 prediction accuracy: 32.45%
Top 3 prediction accuracy: 58.55%
Top 5 prediction accuracy: 72.66%
