{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "from torch.utils.data import DataLoader, TensorDataset, random_split" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "data = np.load(\"./data.npy\")" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "CONTEXT_SIZE = 10\n", "ALPHABET = list(\"abcdefghijklmnopqrstuvwxyz\")\n", "ALPHABET_SIZE = len(ALPHABET)\n", "TRAINING_DATA_SIZE = 0.9\n", "\n", "\n", "# Derived values\n", "PREV_LETTER_FEATURES = CONTEXT_SIZE * ALPHABET_SIZE\n", "CURR_LETTER_FEATURES = ALPHABET_SIZE\n", "OTHER_FEATURES = 3 # is_start, prev_type, word_length\n", "\n", "TOTAL_FEATURES = PREV_LETTER_FEATURES + CURR_LETTER_FEATURES + OTHER_FEATURES\n", "\n", "INPUT_SIZE = PREV_LETTER_FEATURES + OTHER_FEATURES\n", "OUTPUT_SIZE = ALPHABET_SIZE" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Define and split data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define input and output columns" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "X = np.hstack([\n", " data[:, :PREV_LETTER_FEATURES],\n", " data[:, PREV_LETTER_FEATURES + CURR_LETTER_FEATURES:TOTAL_FEATURES]\n", "])\n", "\n", "# Extract current letter (one-hot target)\n", "y_onehot = data[:, PREV_LETTER_FEATURES:PREV_LETTER_FEATURES + CURR_LETTER_FEATURES]\n", "y = np.argmax(y_onehot, axis=1)\n", "\n", "# Torch dataset\n", "X_tensor = torch.tensor(X, dtype=torch.float32)\n", "y_tensor = torch.tensor(y, dtype=torch.long)\n", "\n", "dataset = TensorDataset(X_tensor, y_tensor)\n" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "train_len = int(TRAINING_DATA_SIZE * len(dataset))\n", "train_set, test_set = random_split(dataset, [train_len, len(dataset) - train_len])" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "train_loader = DataLoader(train_set, batch_size=128, shuffle=True)\n", "test_loader = DataLoader(test_set, batch_size=128)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Train on data" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "class MLP(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", " self.net = nn.Sequential(\n", " nn.Linear(INPUT_SIZE, 256),\n", " nn.ReLU(),\n", " nn.Linear(256, 128),\n", " nn.ReLU(),\n", " nn.Linear(128, OUTPUT_SIZE)\n", " )\n", "\n", " def forward(self, x):\n", " return self.net(x)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Using device: cuda\n" ] } ], "source": [ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "print(f\"Using device: {device}\")" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "model = MLP().to(device)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)\n", "criterion = nn.CrossEntropyLoss()" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1, Loss: 4277.8506\n", "Epoch 2, Loss: 3647.3064\n", "Epoch 3, Loss: 3421.2898\n", "Epoch 4, Loss: 3289.9248\n", "Epoch 5, Loss: 3203.0331\n", "Epoch 6, Loss: 3141.4064\n", "Epoch 7, Loss: 3099.4711\n", "Epoch 8, Loss: 3065.2254\n", "Epoch 9, Loss: 3040.1093\n", "Epoch 10, Loss: 3016.0812\n", "Epoch 11, Loss: 2998.2589\n", "Epoch 12, Loss: 2982.5763\n", "Epoch 13, Loss: 2968.7752\n", "Epoch 14, Loss: 2956.6091\n", "Epoch 15, Loss: 2945.3793\n", "Epoch 16, Loss: 2935.6520\n", "Epoch 17, Loss: 2928.2420\n", "Epoch 18, Loss: 2918.6128\n", "Epoch 19, Loss: 2912.0454\n", "Epoch 20, Loss: 2904.7236\n", "Epoch 21, Loss: 2898.5873\n", "Epoch 22, Loss: 2893.1154\n", "Epoch 23, Loss: 2887.1008\n", "Epoch 24, Loss: 2884.5473\n", "Epoch 25, Loss: 2879.1589\n", "Epoch 26, Loss: 2874.9795\n", "Epoch 27, Loss: 2870.3030\n", "Epoch 28, Loss: 2867.0953\n", "Epoch 29, Loss: 2863.1449\n", "Epoch 30, Loss: 2859.8749\n" ] } ], "source": [ "for epoch in range(30):\n", " model.train()\n", " total_loss = 0\n", " for batch_X, batch_y in train_loader:\n", " batch_X, batch_y = batch_X.to(device), batch_y.to(device)\n", " optimizer.zero_grad()\n", " output = model(batch_X)\n", " loss = criterion(output, batch_y)\n", " loss.backward()\n", " optimizer.step()\n", " total_loss += loss.item()\n", " print(f\"Epoch {epoch+1}, Loss: {total_loss:.4f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Testing model" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Top-1 Accuracy: 51.27%\n", "Top-3 Accuracy: 73.68%\n", "Top-5 Accuracy: 82.94%\n" ] } ], "source": [ "model.eval()\n", "correct_top1 = 0\n", "correct_top3 = 0\n", "correct_top5 = 0\n", "total = 0\n", "\n", "with torch.no_grad():\n", " for batch_X, batch_y in test_loader:\n", " batch_X, batch_y = batch_X.to(device), batch_y.to(device)\n", " outputs = model(batch_X) # shape: [batch_size, 26]\n", "\n", " # Get top-5 predictions\n", " _, top_preds = outputs.topk(5, dim=1) # shape: [batch_size, 5]\n", "\n", " for true, top5 in zip(batch_y, top_preds):\n", " total += 1\n", " if true == top5[0]:\n", " correct_top1 += 1\n", " if true in top5[:3]:\n", " correct_top3 += 1\n", " if true in top5:\n", " correct_top5 += 1\n", "\n", "top1_acc = correct_top1 / total\n", "top3_acc = correct_top3 / total\n", "top5_acc = correct_top5 / total\n", "\n", "print(f\"Top-1 Accuracy: {top1_acc * 100:.2f}%\")\n", "print(f\"Top-3 Accuracy: {top3_acc * 100:.2f}%\")\n", "print(f\"Top-5 Accuracy: {top5_acc * 100:.2f}%\")\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }