{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv(\"./out.txt\", sep=',')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Define and split data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define input and output columns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "input_features = ['previous_5','previous_4','previous_3','previous_2','previous_1','is_start','previous_type','word_length']\n", "target_feature = 'current'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "test_size = 0.1 # @param {\"type\":\"number\",\"placeholder\":\"0.1\"}\n", "X_train, X_test, y_train, y_test = train_test_split(data[input_features], data[target_feature], test_size=test_size)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Train on data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LogisticRegression(max_iter=10000, multi_class='multinomial', n_jobs=10,\n",
       "                   solver='saga')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "LogisticRegression(max_iter=10000, multi_class='multinomial', n_jobs=10,\n", " solver='saga')" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = LogisticRegression(multi_class=\"multinomial\", solver=\"saga\", max_iter=10_000, n_jobs=10)\n", "model.fit(X_train, y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create new model which predicts probability" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "y_pred = model.predict(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Testing model" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "acc = accuracy_score(y_test, y_pred)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.211\n" ] } ], "source": [ "print(f\"Accuracy: {acc:.3f}\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }