# Omega
Prediction of next key to be pressed using Multilayer Perceptron

## 1. Import and load data

### Import all required modules

In [34]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, random_split

### Load data

In [35]:
data = np.load("./data.npy")

### Define contstants describing the dataset and other useful information

In [36]:
CONTEXT_SIZE = 10
ALPHABET = list("abcdefghijklmnopqrstuvwxyz")
ALPHABET_SIZE = len(ALPHABET)
TRAINING_DATA_SIZE = 0.9

VOCAB_SIZE = ALPHABET_SIZE + 1 # 26 letters + 1 for unknown
EMBEDDING_DIM = 16

INPUT_SEQ_LEN = CONTEXT_SIZE
OUTPUT_SIZE = VOCAB_SIZE

# Define and split data

## Define input and output columns

In [37]:
X = data[:, :CONTEXT_SIZE]  # shape: (num_samples, CONTEXT_SIZE)

# Target: current letter index
y = data[:, CONTEXT_SIZE]   # shape: (num_samples,)

# Torch dataset (important: use long/int64 for indices)
X_tensor = torch.tensor(X, dtype=torch.long)   # for nn.Embedding
y_tensor = torch.tensor(y, dtype=torch.long)   # for classification target

dataset = TensorDataset(X_tensor, y_tensor)

In [38]:
train_len = int(TRAINING_DATA_SIZE * len(dataset))
train_set, test_set = random_split(dataset, [train_len, len(dataset) - train_len])

In [39]:
train_loader = DataLoader(train_set, batch_size=1024, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1024)

In [40]:
learning_rates = [1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2]
activation_layers = [nn.ReLU, nn.GELU]

## Model and training

To find the best model for MLP, combinations of hyperparams are defined.  
This includes **activation layers** and **learning rates**

In [32]:
from itertools import product
all_activation_combinations = list(product(activation_layers, repeat=len(activation_layers)))

In [66]:
class MLP(nn.Module):
    def __init__(self, activation_layers: list):
        super().__init__()
        self.net = nn.Sequential(
            nn.Embedding(num_embeddings=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM),
            nn.Flatten(),
            nn.Linear(CONTEXT_SIZE * EMBEDDING_DIM, 256),
            activation_layers[0](),
            nn.Linear(256, 128),
            activation_layers[1](),
            nn.Linear(128, OUTPUT_SIZE)
        )

    def forward(self, x):
        return self.net(x)

In [42]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [55]:
# model = MLP().to(device)
model = None

Test all the activation_layer combinations

In [65]:

criterion = nn.CrossEntropyLoss()

In [71]:
def train_model(model, optimizer):
    for epoch in range(30):
        model.train()
        total_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            output = model(batch_X)
            loss = criterion(output, batch_y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        # print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")

# Testing model

In [70]:
def test_model(model) -> tuple[float]:
    model.eval()
    correct_top1 = 0
    correct_top3 = 0
    correct_top5 = 0
    total = 0

    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            outputs = model(batch_X)

            _, top_preds = outputs.topk(5, dim=1)

            for true, top5 in zip(batch_y, top_preds):
                total += 1
                if true == top5[0]:
                    correct_top1 += 1
                if true in top5[:3]:
                    correct_top3 += 1
                if true in top5:
                    correct_top5 += 1

    top1_acc = correct_top1 / total
    top3_acc = correct_top3 / total
    top5_acc = correct_top5 / total

    return (top1_acc, top3_acc, top5_acc)


In [72]:
for activation_layer_combination in all_activation_combinations:
    for learning_rate in learning_rates:
        model = MLP(activation_layer_combination).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        train_model(model, optimizer)
        results = test_model(model)
        print("Model with activation layers", activation_layer_combination, "and learning rate", learning_rate, "had success of", results)

Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0001 had success of (0.44952931636286714, 0.6824383880407573, 0.788915135916511)
Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0005 had success of (0.5080210132919649, 0.7299298381694461, 0.8241018227973064)
Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.001 had success of (0.5215950357860593, 0.7354299615696506, 0.826111483270458)
Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.005 had success of (0.5230758382399605, 0.7383563092761697, 0.8298840038077777)
Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learn

In [13]:
# Reuse same alphabet + mapping
alphabet = list("abcdefghijklmnopqrstuvwxyz")
char_to_idx = {ch: idx for idx, ch in enumerate(alphabet)}
PAD_IDX = len(alphabet)  # index 26 for OOV/padding
VOCAB_SIZE = len(alphabet) + 1  # 27 total (a–z + padding)
CONTEXT_SIZE = 10

idx_to_char = {idx: ch for ch, idx in char_to_idx.items()}
idx_to_char[PAD_IDX] = "_"  # for readability

def preprocess_input(context: str) -> torch.Tensor:
    context = context.lower()
    padded = context.rjust(CONTEXT_SIZE, "_")  # pad with underscores (or any 1-char symbol)

    indices = []
    for ch in padded[-CONTEXT_SIZE:]:
        idx = char_to_idx.get(ch, PAD_IDX)  # if '_' or unknown → PAD_IDX (26)
        indices.append(idx)

    return torch.tensor(indices, dtype=torch.long).unsqueeze(0).to(device)


def predict_next_chars(model, context: str, top_k=5):
    model.eval()
    input_tensor = preprocess_input(context)
    with torch.no_grad():
        logits = model(input_tensor)
        probs = torch.softmax(logits, dim=-1)
        top_probs, top_indices = probs.topk(top_k, dim=-1)

    predictions = [(idx_to_char[idx.item()], top_probs[0, i].item()) for i, idx in enumerate(top_indices[0])]
    return predictions


In [24]:
preds = predict_next_chars(model, "susta", top_k=20)
for char, prob in preds:
    print(f"{char.upper()}: {(prob * 100):.2f} %")


I: 89.74 %
N: 4.42 %
Y: 1.88 %
M: 1.51 %
B: 0.90 %
E: 0.65 %
G: 0.21 %
R: 0.16 %
L: 0.15 %
O: 0.13 %
C: 0.09 %
U: 0.08 %
A: 0.05 %
V: 0.02 %
S: 0.01 %
F: 0.00 %
H: 0.00 %
T: 0.00 %
W: 0.00 %
P: 0.00 %


Model saving

In [None]:
torch.save(model, "mlp_full_model.pth")

In [38]:
torch.save(model.state_dict(), "mlp_weights.pth")