{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "data = pd.read_csv(\"./out.txt\", sep=',')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Define and split data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define input and output columns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "input_features = ['previous_5','previous_4','previous_3','previous_2','previous_1','is_start','previous_type','word_length']\n", "target_feature = 'current'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "test_size = 0.1 # @param {\"type\":\"number\",\"placeholder\":\"0.1\"}\n", "X_train, X_test, y_train, y_test = train_test_split(data[input_features], data[target_feature], test_size=test_size)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Train on data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LogisticRegression(max_iter=10000, multi_class='multinomial', n_jobs=10,\n", " solver='saga')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=10000, multi_class='multinomial', n_jobs=10,\n", " solver='saga')