{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Omega\n",
    "Prediction of next key to be pressed using Multilayer Perceptron"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Import and load data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import all required modules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.utils.data import DataLoader, TensorDataset, random_split"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = np.load(\"./data.npy\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Define contstants describing the dataset and other useful information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "CONTEXT_SIZE = 10\n",
    "ALPHABET = list(\"abcdefghijklmnopqrstuvwxyz\")\n",
    "ALPHABET_SIZE = len(ALPHABET)\n",
    "TRAINING_DATA_SIZE = 0.9\n",
    "\n",
    "VOCAB_SIZE = ALPHABET_SIZE + 1 # 26 letters + 1 for unknown\n",
    "EMBEDDING_DIM = 16\n",
    "\n",
    "INPUT_SEQ_LEN = CONTEXT_SIZE\n",
    "OUTPUT_SIZE = VOCAB_SIZE"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define and split data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define input and output columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = data[:, :CONTEXT_SIZE]  # shape: (num_samples, CONTEXT_SIZE)\n",
    "\n",
    "# Target: current letter index\n",
    "y = data[:, CONTEXT_SIZE]   # shape: (num_samples,)\n",
    "\n",
    "# Torch dataset (important: use long/int64 for indices)\n",
    "X_tensor = torch.tensor(X, dtype=torch.long)   # for nn.Embedding\n",
    "y_tensor = torch.tensor(y, dtype=torch.long)   # for classification target\n",
    "\n",
    "dataset = TensorDataset(X_tensor, y_tensor)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_len = int(TRAINING_DATA_SIZE * len(dataset))\n",
    "train_set, test_set = random_split(dataset, [train_len, len(dataset) - train_len])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_loader = DataLoader(train_set, batch_size=1024, shuffle=True)\n",
    "test_loader = DataLoader(test_set, batch_size=1024)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rates = [1e-4, 5e-4, 1e-3, 5e-3, 1e-2, 5e-2]\n",
    "activation_layers = [nn.ReLU, nn.GELU]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model and training"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To find the best model for MLP, combinations of hyperparams are defined.  \n",
    "This includes **activation layers** and **learning rates**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "from itertools import product\n",
    "all_activation_combinations = list(product(activation_layers, repeat=len(activation_layers)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MLP(nn.Module):\n",
    "    def __init__(self, activation_layers: list):\n",
    "        super().__init__()\n",
    "        self.net = nn.Sequential(\n",
    "            nn.Embedding(num_embeddings=VOCAB_SIZE, embedding_dim=EMBEDDING_DIM),\n",
    "            nn.Flatten(),\n",
    "            nn.Linear(CONTEXT_SIZE * EMBEDDING_DIM, 256),\n",
    "            activation_layers[0](),\n",
    "            nn.Linear(256, 128),\n",
    "            activation_layers[1](),\n",
    "            nn.Linear(128, OUTPUT_SIZE)\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.net(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using device: cuda\n"
     ]
    }
   ],
   "source": [
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "print(f\"Using device: {device}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model = MLP().to(device)\n",
    "model = None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Test all the activation_layer combinations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "criterion = nn.CrossEntropyLoss()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_model(model, optimizer):\n",
    "    for epoch in range(30):\n",
    "        model.train()\n",
    "        total_loss = 0\n",
    "        for batch_X, batch_y in train_loader:\n",
    "            batch_X, batch_y = batch_X.to(device), batch_y.to(device)\n",
    "            optimizer.zero_grad()\n",
    "            output = model(batch_X)\n",
    "            loss = criterion(output, batch_y)\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            total_loss += loss.item()\n",
    "        # print(f\"Epoch {epoch+1}, Loss: {total_loss:.4f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Testing model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "def test_model(model) -> tuple[float]:\n",
    "    model.eval()\n",
    "    correct_top1 = 0\n",
    "    correct_top3 = 0\n",
    "    correct_top5 = 0\n",
    "    total = 0\n",
    "\n",
    "    with torch.no_grad():\n",
    "        for batch_X, batch_y in test_loader:\n",
    "            batch_X, batch_y = batch_X.to(device), batch_y.to(device)\n",
    "            outputs = model(batch_X)\n",
    "\n",
    "            _, top_preds = outputs.topk(5, dim=1)\n",
    "\n",
    "            for true, top5 in zip(batch_y, top_preds):\n",
    "                total += 1\n",
    "                if true == top5[0]:\n",
    "                    correct_top1 += 1\n",
    "                if true in top5[:3]:\n",
    "                    correct_top3 += 1\n",
    "                if true in top5:\n",
    "                    correct_top5 += 1\n",
    "\n",
    "    top1_acc = correct_top1 / total\n",
    "    top3_acc = correct_top3 / total\n",
    "    top5_acc = correct_top5 / total\n",
    "\n",
    "    return (top1_acc, top3_acc, top5_acc)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0001 had success of (0.44952931636286714, 0.6824383880407573, 0.788915135916511)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0005 had success of (0.5080210132919649, 0.7299298381694461, 0.8241018227973064)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.001 had success of (0.5215950357860593, 0.7354299615696506, 0.826111483270458)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.005 had success of (0.5230758382399605, 0.7383563092761697, 0.8298840038077777)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.01 had success of (0.5206783485526919, 0.7364171632055847, 0.8278390861333428)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.05 had success of (0.12682015301625357, 0.29884003807777737, 0.45160949123858546)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0001 had success of (0.44251313330747805, 0.6765504354264359, 0.7860240454112752)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0005 had success of (0.5103127313753835, 0.7293304657476289, 0.8237492507844727)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.001 had success of (0.5211366921693756, 0.7379332228607693, 0.8288968021718436)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.005 had success of (0.5246271550964284, 0.739942883333921, 0.8305538906321617)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.01 had success of (0.5214892641822092, 0.7391319677044036, 0.8297077178013609)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.ReLU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.05 had success of (0.1655325600253852, 0.3544759017029228, 0.495469449635088)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0001 had success of (0.44706131227303175, 0.6806755279765893, 0.7906427387793957)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.0005 had success of (0.5120050770369848, 0.7312343546169305, 0.8229735923562388)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.001 had success of (0.5179282868525896, 0.7381800232697528, 0.8289673165744104)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.005 had success of (0.5234636674540775, 0.7421640870147728, 0.8307654338398618)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.01 had success of (0.5197264041180412, 0.7384268236787364, 0.8286500017628601)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.ReLU'>) and learning rate 0.05 had success of (0.12551563656876918, 0.29757077883157634, 0.45034023199238443)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0001 had success of (0.4493530303564503, 0.683284560871558, 0.7907837675845292)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.0005 had success of (0.5151077107499207, 0.733808130310616, 0.8255121108486408)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.001 had success of (0.5195148609103409, 0.7389204244967035, 0.8294961745936608)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.005 had success of (0.5214892641822092, 0.7401896837429045, 0.8302365758206114)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.01 had success of (0.5198674329231746, 0.7398371117300708, 0.8258294256601911)\n",
      "Model with activation layers (<class 'torch.nn.modules.activation.GELU'>, <class 'torch.nn.modules.activation.GELU'>) and learning rate 0.05 had success of (0.3762648520960406, 0.6283538412720798, 0.7500617001022459)\n"
     ]
    }
   ],
   "source": [
    "for activation_layer_combination in all_activation_combinations:\n",
    "    for learning_rate in learning_rates:\n",
    "        model = MLP(activation_layer_combination).to(device)\n",
    "        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
    "        train_model(model, optimizer)\n",
    "        results = test_model(model)\n",
    "        print(\"Model with activation layers\", activation_layer_combination, \"and learning rate\", learning_rate, \"had success of\", results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Reuse same alphabet + mapping\n",
    "alphabet = list(\"abcdefghijklmnopqrstuvwxyz\")\n",
    "char_to_idx = {ch: idx for idx, ch in enumerate(alphabet)}\n",
    "PAD_IDX = len(alphabet)  # index 26 for OOV/padding\n",
    "VOCAB_SIZE = len(alphabet) + 1  # 27 total (a–z + padding)\n",
    "CONTEXT_SIZE = 10\n",
    "\n",
    "idx_to_char = {idx: ch for ch, idx in char_to_idx.items()}\n",
    "idx_to_char[PAD_IDX] = \"_\"  # for readability\n",
    "\n",
    "def preprocess_input(context: str) -> torch.Tensor:\n",
    "    context = context.lower()\n",
    "    padded = context.rjust(CONTEXT_SIZE, \"_\")  # pad with underscores (or any 1-char symbol)\n",
    "\n",
    "    indices = []\n",
    "    for ch in padded[-CONTEXT_SIZE:]:\n",
    "        idx = char_to_idx.get(ch, PAD_IDX)  # if '_' or unknown → PAD_IDX (26)\n",
    "        indices.append(idx)\n",
    "\n",
    "    return torch.tensor(indices, dtype=torch.long).unsqueeze(0).to(device)\n",
    "\n",
    "\n",
    "def predict_next_chars(model, context: str, top_k=5):\n",
    "    model.eval()\n",
    "    input_tensor = preprocess_input(context)\n",
    "    with torch.no_grad():\n",
    "        logits = model(input_tensor)\n",
    "        probs = torch.softmax(logits, dim=-1)\n",
    "        top_probs, top_indices = probs.topk(top_k, dim=-1)\n",
    "\n",
    "    predictions = [(idx_to_char[idx.item()], top_probs[0, i].item()) for i, idx in enumerate(top_indices[0])]\n",
    "    return predictions\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "I: 89.74 %\n",
      "N: 4.42 %\n",
      "Y: 1.88 %\n",
      "M: 1.51 %\n",
      "B: 0.90 %\n",
      "E: 0.65 %\n",
      "G: 0.21 %\n",
      "R: 0.16 %\n",
      "L: 0.15 %\n",
      "O: 0.13 %\n",
      "C: 0.09 %\n",
      "U: 0.08 %\n",
      "A: 0.05 %\n",
      "V: 0.02 %\n",
      "S: 0.01 %\n",
      "F: 0.00 %\n",
      "H: 0.00 %\n",
      "T: 0.00 %\n",
      "W: 0.00 %\n",
      "P: 0.00 %\n"
     ]
    }
   ],
   "source": [
    "preds = predict_next_chars(model, \"susta\", top_k=20)\n",
    "for char, prob in preds:\n",
    "    print(f\"{char.upper()}: {(prob * 100):.2f} %\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Model saving"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(model, \"mlp_full_model.pth\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(model.state_dict(), \"mlp_weights.pth\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}