omega/model/notebook.ipynb
2025-04-03 15:19:43 +02:00

476 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Omega\n",
"Prediction of next key to be pressed using Multilayer Perceptron"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Import and load data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import all required modules"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"from torch.utils.data import DataLoader, TensorDataset, random_split"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load data"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
"data = np.load(\"./data.npy\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define contstants describing the dataset and other useful information"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"CONTEXT_SIZE = 10\n",
"ALPHABET = list(\"abcdefghijklmnopqrstuvwxyz\")\n",
"ALPHABET_SIZE = len(ALPHABET)\n",
"TRAINING_DATA_SIZE = 0.9\n",
"\n",
"VOCAB_SIZE = ALPHABET_SIZE + 1 # 26 letters + 1 for unknown\n",
"EMBEDDING_DIM = 16\n",
"\n",
"INPUT_SEQ_LEN = CONTEXT_SIZE\n",
"OUTPUT_SIZE = VOCAB_SIZE"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Define and split data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define input and output columns"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"X = data[:, :CONTEXT_SIZE] # shape: (num_samples, CONTEXT_SIZE)\n",
"\n",
"# Target: current letter index\n",
"y = data[:, CONTEXT_SIZE] # shape: (num_samples,)\n",
"\n",
"# Torch dataset (important: use long/int64 for indices)\n",
"X_tensor = torch.tensor(X, dtype=torch.long) # for nn.Embedding\n",
"y_tensor = torch.tensor(y, dtype=torch.long) # for classification target\n",
"\n",
"dataset = TensorDataset(X_tensor, y_tensor)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"train_len = int(TRAINING_DATA_SIZE * len(dataset))\n",
"train_set, test_set = random_split(dataset, [train_len, len(dataset) - train_len])"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"train_loader = DataLoader(train_set, batch_size=1024, shuffle=True)\n",
"test_loader = DataLoader(test_set, batch_size=1024)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"learning_rates = [1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2]\n",
"activation_layers = [nn.ReLU, nn.GELU]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Model and training"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To find the best model for MLP, combinations of hyperparams are defined. \n",
"This includes **activation layers** and **learning rates**"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"from itertools import product\n",
"all_activation_combinations = list(product(activation_layers, repeat=len(activation_layers)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"class MLP(nn.Module):\n",
" def __init__(self, activation_layers: list):\n",
" super().__init__()\n",
" self.net = nn.Sequential(\n",
" nn.Embedding(num_embeddings=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM),\n",
" nn.Flatten(),\n",
" nn.Linear(CONTEXT_SIZE * EMBEDDING_DIM, 256),\n",
" activation_layers[0](),\n",
" nn.Linear(256, 128),\n",
" activation_layers[1](),\n",
" nn.Linear(128, OUTPUT_SIZE)\n",
" )\n",
"\n",
" def forward(self, x):\n",
" return self.net(x)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using device: cuda\n"
]
}
],
"source": [
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"print(f\"Using device: {device}\")"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"# model = MLP().to(device)\n",
"model = None"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Test all the activation_layer combinations"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"\n",
"criterion = nn.CrossEntropyLoss()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"def train_model(model, optimizer):\n",
" for epoch in range(30):\n",
" model.train()\n",
" total_loss = 0\n",
" for batch_X, batch_y in train_loader:\n",
" batch_X, batch_y = batch_X.to(device), batch_y.to(device)\n",
" optimizer.zero_grad()\n",
" output = model(batch_X)\n",
" loss = criterion(output, batch_y)\n",
" loss.backward()\n",
" optimizer.step()\n",
" total_loss += loss.item()\n",
" # print(f\"Epoch {epoch+1}, Loss: {total_loss:.4f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Testing model"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"def test_model(model) -> tuple[float]:\n",
" model.eval()\n",
" correct_top1 = 0\n",
" correct_top3 = 0\n",
" correct_top5 = 0\n",
" total = 0\n",
"\n",
" with torch.no_grad():\n",
" for batch_X, batch_y in test_loader:\n",
" batch_X, batch_y = batch_X.to(device), batch_y.to(device)\n",
" outputs = model(batch_X)\n",
"\n",
" _, top_preds = outputs.topk(5, dim=1)\n",
"\n",
" for true, top5 in zip(batch_y, top_preds):\n",
" total += 1\n",
" if true == top5[0]:\n",
" correct_top1 += 1\n",
" if true in top5[:3]:\n",
" correct_top3 += 1\n",
" if true in top5:\n",
" correct_top5 += 1\n",
"\n",
" top1_acc = correct_top1 / total\n",
" top3_acc = correct_top3 / total\n",
" top5_acc = correct_top5 / total\n",
"\n",
" return (top1_acc, top3_acc, top5_acc)\n"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0001 had success of (0.44952931636286714, 0.6824383880407573, 0.788915135916511)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0005 had success of (0.5080210132919649, 0.7299298381694461, 0.8241018227973064)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.001 had success of (0.5215950357860593, 0.7354299615696506, 0.826111483270458)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.005 had success of (0.5230758382399605, 0.7383563092761697, 0.8298840038077777)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.01 had success of (0.5206783485526919, 0.7364171632055847, 0.8278390861333428)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.05 had success of (0.12682015301625357, 0.29884003807777737, 0.45160949123858546)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0001 had success of (0.44251313330747805, 0.6765504354264359, 0.7860240454112752)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0005 had success of (0.5103127313753835, 0.7293304657476289, 0.8237492507844727)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.001 had success of (0.5211366921693756, 0.7379332228607693, 0.8288968021718436)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.005 had success of (0.5246271550964284, 0.739942883333921, 0.8305538906321617)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.01 had success of (0.5214892641822092, 0.7391319677044036, 0.8297077178013609)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.05 had success of (0.1655325600253852, 0.3544759017029228, 0.495469449635088)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0001 had success of (0.44706131227303175, 0.6806755279765893, 0.7906427387793957)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0005 had success of (0.5120050770369848, 0.7312343546169305, 0.8229735923562388)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.001 had success of (0.5179282868525896, 0.7381800232697528, 0.8289673165744104)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.005 had success of (0.5234636674540775, 0.7421640870147728, 0.8307654338398618)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.01 had success of (0.5197264041180412, 0.7384268236787364, 0.8286500017628601)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.05 had success of (0.12551563656876918, 0.29757077883157634, 0.45034023199238443)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0001 had success of (0.4493530303564503, 0.683284560871558, 0.7907837675845292)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0005 had success of (0.5151077107499207, 0.733808130310616, 0.8255121108486408)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.001 had success of (0.5195148609103409, 0.7389204244967035, 0.8294961745936608)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.005 had success of (0.5214892641822092, 0.7401896837429045, 0.8302365758206114)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.01 had success of (0.5198674329231746, 0.7398371117300708, 0.8258294256601911)\n",
"Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.05 had success of (0.3762648520960406, 0.6283538412720798, 0.7500617001022459)\n"
]
}
],
"source": [
"for activation_layer_combination in all_activation_combinations:\n",
" for learning_rate in learning_rates:\n",
" model = MLP(activation_layer_combination).to(device)\n",
" optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
" train_model(model, optimizer)\n",
" results = test_model(model)\n",
" print(\"Model with activation layers\", activation_layer_combination, \"and learning rate\", learning_rate, \"had success of\", results)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# Reuse same alphabet + mapping\n",
"alphabet = list(\"abcdefghijklmnopqrstuvwxyz\")\n",
"char_to_idx = {ch: idx for idx, ch in enumerate(alphabet)}\n",
"PAD_IDX = len(alphabet) # index 26 for OOV/padding\n",
"VOCAB_SIZE = len(alphabet) + 1 # 27 total (az + padding)\n",
"CONTEXT_SIZE = 10\n",
"\n",
"idx_to_char = {idx: ch for ch, idx in char_to_idx.items()}\n",
"idx_to_char[PAD_IDX] = \"_\" # for readability\n",
"\n",
"def preprocess_input(context: str) -> torch.Tensor:\n",
" context = context.lower()\n",
" padded = context.rjust(CONTEXT_SIZE, \"_\") # pad with underscores (or any 1-char symbol)\n",
"\n",
" indices = []\n",
" for ch in padded[-CONTEXT_SIZE:]:\n",
" idx = char_to_idx.get(ch, PAD_IDX) # if '_' or unknown → PAD_IDX (26)\n",
" indices.append(idx)\n",
"\n",
" return torch.tensor(indices, dtype=torch.long).unsqueeze(0).to(device)\n",
"\n",
"\n",
"def predict_next_chars(model, context: str, top_k=5):\n",
" model.eval()\n",
" input_tensor = preprocess_input(context)\n",
" with torch.no_grad():\n",
" logits = model(input_tensor)\n",
" probs = torch.softmax(logits, dim=-1)\n",
" top_probs, top_indices = probs.topk(top_k, dim=-1)\n",
"\n",
" predictions = [(idx_to_char[idx.item()], top_probs[0, i].item()) for i, idx in enumerate(top_indices[0])]\n",
" return predictions\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"I: 89.74 %\n",
"N: 4.42 %\n",
"Y: 1.88 %\n",
"M: 1.51 %\n",
"B: 0.90 %\n",
"E: 0.65 %\n",
"G: 0.21 %\n",
"R: 0.16 %\n",
"L: 0.15 %\n",
"O: 0.13 %\n",
"C: 0.09 %\n",
"U: 0.08 %\n",
"A: 0.05 %\n",
"V: 0.02 %\n",
"S: 0.01 %\n",
"F: 0.00 %\n",
"H: 0.00 %\n",
"T: 0.00 %\n",
"W: 0.00 %\n",
"P: 0.00 %\n"
]
}
],
"source": [
"preds = predict_next_chars(model, \"susta\", top_k=20)\n",
"for char, prob in preds:\n",
" print(f\"{char.upper()}: {(prob * 100):.2f} %\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Model saving"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"torch.save(model, \"mlp_full_model.pth\")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"torch.save(model.state_dict(), \"mlp_weights.pth\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}