{ "cells": [ { "cell_type": "markdown", "id": "b65dfbdc-bb80-4bd4-8248-dd06483222cd", "metadata": {}, "source": [ "# K-fold Validation PorkCNN\n", "\n", "author: davidycliao(David Yen-Chieh Liao) \n", "email: davidycliao@gmail.com \n", "date: 9-July-2021 " ] }, { "cell_type": "markdown", "id": "e8005d5a-f938-4c26-86d5-592408152d08", "metadata": {}, "source": [ "-------------------------\n", "\n", "### Stage 1: Libaries & Dependencies" ] }, { "cell_type": "code", "execution_count": 1, "id": "6ca92ba3-7480-418f-91e0-d2beea2d10af", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T22:59:53.467113Z", "iopub.status.busy": "2021-07-08T22:59:53.466883Z", "iopub.status.idle": "2021-07-08T22:59:59.014730Z", "shell.execute_reply": "2021-07-08T22:59:59.014249Z", "shell.execute_reply.started": "2021-07-08T22:59:53.467088Z" }, "tags": [] }, "outputs": [], "source": [ "# built-in library\n", "import math\n", "import re\n", "import collections\n", "import zipfile\n", "import random\n", "from itertools import chain\n", "\n", "# ML & Deep Learning/ NLP toolkit\n", "import pandas as pd\n", "import numpy as np\n", "import jieba\n", "from sklearn.model_selection import train_test_split\n", "import tensorflow as tf\n", "from tensorflow.keras import layers\n", "import tensorflow_datasets as tfds\n", "from tensorflow.keras.callbacks import EarlyStopping,TensorBoard\n", "\n", "# Visualization\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "id": "8bd77e05-355a-422f-87db-2cf77cba3bdd", "metadata": {}, "source": [ "-------------------------\n", "\n", "### Stage 2: Data Preprocessing (Training Data: Introduction of Bills and Legislation from 6th Session to 7th Session, 2004-2012)" ] }, { "cell_type": "markdown", "id": "3b57b9c9-443e-4e26-bda2-15f6877845d5", "metadata": {}, "source": [ "#### (1) Read file " ] }, { "cell_type": "code", "execution_count": 2, "id": "e1973cfb-73d4-4717-a67a-a4e8a0c9edb0", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:21.285359Z", "iopub.status.busy": "2021-07-08T23:00:21.285192Z", "iopub.status.idle": "2021-07-08T23:00:21.327935Z", "shell.execute_reply": "2021-07-08T23:00:21.327224Z", "shell.execute_reply.started": "2021-07-08T23:00:21.285341Z" }, "tags": [] }, "outputs": [], "source": [ "# read file\n", "df = pd.read_csv('data/Pork Bill - 2021-05-20.csv',encoding='utf-8')\n", "\n", "# combine abstract of bill and title \n", "df['text'] = df['Title'] + df['Content'].fillna(df['Title'])\n", "\n", "# drop conten without having any characters\n", "# view na's row: df[df['text'].isnull()==True]\n", "data = df[['text', 'pork_bill']].dropna(subset=['text'])" ] }, { "cell_type": "code", "execution_count": 3, "id": "f6160785-9522-4404-8f77-6df941869cde", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:21.620052Z", "iopub.status.busy": "2021-07-08T23:00:21.619859Z", "iopub.status.idle": "2021-07-08T23:00:21.624658Z", "shell.execute_reply": "2021-07-08T23:00:21.624057Z", "shell.execute_reply.started": "2021-07-08T23:00:21.620031Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Pork Legislation 2510 \n", " None-Pork Legislation 4733\n" ] } ], "source": [ "print(\" Pork Legislation\", data['pork_bill'].value_counts()[1],'\\n', \n", " \"None-Pork Legislation\", data['pork_bill'].value_counts()[0])" ] }, { "cell_type": "markdown", "id": "5e6d677d-e200-40e6-9252-130eb02fc682", "metadata": { "execution": { "iopub.execute_input": "2021-05-16T17:01:03.001989Z", "iopub.status.busy": "2021-05-16T17:01:03.001801Z", "iopub.status.idle": "2021-05-16T17:01:03.004600Z", "shell.execute_reply": "2021-05-16T17:01:03.003789Z", "shell.execute_reply.started": "2021-05-16T17:01:03.001969Z" } }, "source": [ "#### (2) Tokenization" ] }, { "cell_type": "code", "execution_count": 4, "id": "120e8b54-7f75-4ca6-a0d0-9872b1522e8e", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:21.966144Z", "iopub.status.busy": "2021-07-08T23:00:21.965937Z", "iopub.status.idle": "2021-07-08T23:00:21.972829Z", "shell.execute_reply": "2021-07-08T23:00:21.972242Z", "shell.execute_reply.started": "2021-07-08T23:00:21.966122Z" }, "tags": [] }, "outputs": [], "source": [ "import collections\n", "import numpy as np\n", "import jieba\n", "from itertools import chain\n", "\n", "\n", "def jieba_cut(filename):\n", " \"\"\"\n", " cut Chinese and remove stop words\n", " Reference: https://www.cnblogs.com/Luv-GEM/p/10836454.html\n", " Stopwords: https://www.kaggle.com/rikdifos/english-and-chinese-stopwords?select=cn_stopwords.txt\n", " \"\"\"\n", " stop_list = [i.strip() for i in open('cn_stopwords.txt','r',encoding='utf-8')] \n", " news_cut = []\n", " news_list = []\n", " for line in filename: \n", " if line:\n", " news_cut = list(jieba.cut(''.join(line),cut_all=False,HMM=True)) \n", " news_list.append([word.strip() for word in news_cut if word not in stop_list and len(word.strip())>0]) \n", " news_list = list(chain.from_iterable(news_list)) \n", " return news_list\n", "\n", "def clearPucts(context):\n", " \"\"\"\n", " remove punctuation\n", " ref: https://chenyuzuoo.github.io/posts/28001/\n", " \"\"\"\n", " context = re.sub(\"[\\s+\\.\\!\\/_,$%^*(+\\\"\\']+|[+——!,。?、~@#¥%……&*()]+\", \"\", context)\n", " context = re.sub(\"[【】╮╯▽╰╭★→「」]+\",\"\", context)\n", " context = re.sub(\"!,❤。~《》:()【】「」?”“;:、\",\"\",context)\n", " context = re.sub(\"\\s\",\"\",context)\n", " return context\n", "\n", "def seg_char(sent):\n", " \"\"\"\n", " cut Chinese and remove stop words\n", " ref: https://blog.csdn.net/renyuanfang/article/details/86487367\n", " \"\"\"\n", " # split\n", " pattern_char_1 = re.compile(r'([\\W])')\n", " parts = pattern_char_1.split(sent)\n", " parts = [p for p in parts if len(p.strip())>0]\n", " # cut sentence\n", " pattern = re.compile(r'([\\u4e00-\\u9fa5])')\n", " chars = pattern.split(sent)\n", " chars = [w for w in chars if len(w.strip())>0]\n", " chars = ' '.join(chars)\n", " return chars" ] }, { "cell_type": "code", "execution_count": 5, "id": "25f4277c-001d-47a4-81d0-f5ac5ec5e5d3", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:22.140770Z", "iopub.status.busy": "2021-07-08T23:00:22.140512Z", "iopub.status.idle": "2021-07-08T23:00:24.043746Z", "shell.execute_reply": "2021-07-08T23:00:24.043162Z", "shell.execute_reply.started": "2021-07-08T23:00:22.140740Z" }, "tags": [] }, "outputs": [], "source": [ "data_clean = [seg_char(text) for text in [clearPucts(text) for text in data.text]]\n", "\n", "tokenizer = tfds.deprecated.text.SubwordTextEncoder.build_from_corpus(\n", " data_clean, target_vocab_size=2**18)\n", "\n", "data_inputs = [tokenizer.encode(sentence) for sentence in data_clean]" ] }, { "cell_type": "markdown", "id": "089e65af-e49c-4d60-962e-a0a826809aa5", "metadata": { "execution": { "iopub.execute_input": "2021-05-16T17:01:47.611294Z", "iopub.status.busy": "2021-05-16T17:01:47.611020Z", "iopub.status.idle": "2021-05-16T17:01:47.614208Z", "shell.execute_reply": "2021-05-16T17:01:47.613487Z", "shell.execute_reply.started": "2021-05-16T17:01:47.611276Z" } }, "source": [ "#### (3) Padding" ] }, { "cell_type": "code", "execution_count": 6, "id": "b801d231-972c-47e3-83c4-7375c2e11a18", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:24.075840Z", "iopub.status.busy": "2021-07-08T23:00:24.075642Z", "iopub.status.idle": "2021-07-08T23:00:24.141494Z", "shell.execute_reply": "2021-07-08T23:00:24.140925Z", "shell.execute_reply.started": "2021-07-08T23:00:24.075816Z" }, "tags": [] }, "outputs": [], "source": [ "MAX_LEN = max([len(sentence) for sentence in data_clean])\n", "data_inputs = tf.keras.preprocessing.sequence.pad_sequences(data_inputs,\n", " value=0,\n", " padding=\"post\",\n", " maxlen=MAX_LEN)\n", "\n", "data_labels = data.pork_bill.values\n", "#print('Maximun length:{} \\nInput:{}'.format(MAX_LEN, data_inputs.shape[0]))\n" ] }, { "cell_type": "markdown", "id": "0346b3fa-dd60-4662-9af2-83e932d09d3c", "metadata": {}, "source": [ "#### (4) Spliting Training / Testing Set" ] }, { "cell_type": "code", "execution_count": 7, "id": "383bf84d-b61d-45e3-89bd-64c908efbf6c", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:24.142886Z", "iopub.status.busy": "2021-07-08T23:00:24.142722Z", "iopub.status.idle": "2021-07-08T23:00:24.156188Z", "shell.execute_reply": "2021-07-08T23:00:24.155525Z", "shell.execute_reply.started": "2021-07-08T23:00:24.142865Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape of X Train: (4852, 785) \n", "Shape of X Test : (2391, 785) \n", "Shape of Y Trian: (4852,) \n", "Shape of Y Test : (2391,)\n" ] } ], "source": [ "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "train_data, test_data, train_targets, test_targets = train_test_split(\n", " data_inputs, data_labels, test_size=0.33, random_state=42)\n", "\n", "print(\"Shape of X Train:\", train_data.shape, '\\n'\n", " \"Shape of X Test :\", test_data.shape,'\\n'\n", " \"Shape of Y Trian:\", train_targets.shape , '\\n'\n", " \"Shape of Y Test :\", test_targets.shape )" ] }, { "cell_type": "markdown", "id": "725f7feb-b93b-4d0b-bcbf-562240a79f8b", "metadata": { "execution": { "iopub.execute_input": "2021-05-16T17:04:13.375644Z", "iopub.status.busy": "2021-05-16T17:04:13.375440Z", "iopub.status.idle": "2021-05-16T17:04:13.378111Z", "shell.execute_reply": "2021-05-16T17:04:13.377464Z", "shell.execute_reply.started": "2021-05-16T17:04:13.375623Z" }, "tags": [] }, "source": [ "-------------------------\n", "\n", "\n", "### Stage 3: Model and Building" ] }, { "cell_type": "markdown", "id": "4a0e74bf-eaf4-4043-8d23-88291d8ea344", "metadata": { "execution": { "iopub.execute_input": "2021-05-16T17:13:18.807725Z", "iopub.status.busy": "2021-05-16T17:13:18.807498Z", "iopub.status.idle": "2021-05-16T17:13:18.810574Z", "shell.execute_reply": "2021-05-16T17:13:18.809625Z", "shell.execute_reply.started": "2021-05-16T17:13:18.807701Z" } }, "source": [ "#### (1) Using the Subclassing API to Build Dynamic Model" ] }, { "cell_type": "code", "execution_count": 8, "id": "1f18f9b1-5f4d-4d0a-baf4-2eebed7f34f2", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:25.614474Z", "iopub.status.busy": "2021-07-08T23:00:25.614219Z", "iopub.status.idle": "2021-07-08T23:00:25.624817Z", "shell.execute_reply": "2021-07-08T23:00:25.624119Z", "shell.execute_reply.started": "2021-07-08T23:00:25.614446Z" }, "tags": [] }, "outputs": [], "source": [ "class DCNN(tf.keras.Model):\n", " def __init__(self,\n", " vocab_size,\n", " emb_dim=128,\n", " nb_filters=100,\n", " # units: Positive integer, dimensionality of the output space.\n", " FFN_units=512,\n", " nb_classes=2,\n", " dropout_rate=0.1,\n", " training=False,\n", " name=\"PorkCNN\"):\n", " super(DCNN, self).__init__(name=name)\n", " self.embedding = layers.Embedding(vocab_size, emb_dim)\n", " self.bigram = layers.Conv1D(filters=nb_filters, kernel_size=2, strides = 1, padding=\"valid\", activation=\"relu\")\n", " self.bigram2 = layers.Conv1D(filters=nb_filters, kernel_size=2, strides = 2, padding=\"valid\", activation=\"relu\")\n", " self.trigram = layers.Conv1D(filters=nb_filters,kernel_size=3,strides = 1, padding=\"valid\",activation=\"relu\")\n", " self.trigram2 = layers.Conv1D(filters=nb_filters,kernel_size=3,strides = 2, padding=\"valid\",activation=\"relu\")\n", " self.fourgram = layers.Conv1D(filters=nb_filters,kernel_size=4,strides = 2, padding=\"valid\",activation=\"relu\")\n", " self.fivegram = layers.Conv1D(filters=nb_filters,kernel_size=5,strides = 2, padding=\"valid\",activation=\"relu\")\n", " self.pool = layers.GlobalMaxPool1D()\n", " self.dense_1 = layers.Dense(units=FFN_units, activation=\"relu\")\n", " self.dropout = layers.Dropout(rate=dropout_rate)\n", " self.last_dense = layers.Dense(units=1, activation=\"sigmoid\") \n", " def call(self, inputs, training):\n", " x = self.embedding(inputs)\n", " x_1 = self.bigram(x)\n", " x_1 = self.pool(x_1)\n", " x_1_1 = self.bigram2(x)\n", " x_1_1 = self.pool(x_1_1) \n", " x_2 = self.trigram(x)\n", " x_2 = self.pool(x_2)\n", " x_2_1 = self.trigram2(x)\n", " x_2_1 = self.pool(x_2_1) \n", " x_3 = self.fourgram(x)\n", " x_3 = self.pool(x_3)\n", " x_4 = self.fourgram(x)\n", " x_4 = self.pool(x_4) \n", " x_5 = self.fivegram(x)\n", " x_5 = self.pool(x_5) \n", " merged = tf.concat([x_1,x_1_1, x_2,x_2_1, x_3, x_4, x_5], axis=-1) \n", " merged = self.dense_1(merged)\n", " merged = self.dropout(merged, training)\n", " output = self.last_dense(merged) \n", " return output" ] }, { "cell_type": "code", "execution_count": 9, "id": "bfc02e93-3a0e-4c1c-b488-b2db98b53824", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:26.106352Z", "iopub.status.busy": "2021-07-08T23:00:26.106099Z", "iopub.status.idle": "2021-07-08T23:00:26.128713Z", "shell.execute_reply": "2021-07-08T23:00:26.128071Z", "shell.execute_reply.started": "2021-07-08T23:00:26.106324Z" }, "tags": [] }, "outputs": [], "source": [ "VOCAB_SIZE = tokenizer.vocab_size #tokenizer.vocab_size # 5000 tokenizer.vocab_size\n", "EMB_DIM = 200\n", "NB_FILTERS = 100\n", "FFN_UNITS = 256\n", "NB_CLASSES = 2 #len(set(train_labels))\n", "DROPOUT_RATE = 0.25\n", "BATCH_SIZE = 230\n", "NB_EPOCHS = 80\n", "\n", "Dcnn = DCNN(vocab_size=VOCAB_SIZE,\n", " emb_dim=EMB_DIM,\n", " nb_filters=NB_FILTERS,\n", " FFN_units=FFN_UNITS,\n", " nb_classes=NB_CLASSES,\n", " dropout_rate=DROPOUT_RATE)" ] }, { "cell_type": "markdown", "id": "b0ec8f51-6ed8-49fa-9d5e-c6929184bdf1", "metadata": { "execution": { "iopub.execute_input": "2021-05-16T18:43:59.279424Z", "iopub.status.busy": "2021-05-16T18:43:59.279202Z", "iopub.status.idle": "2021-05-16T18:43:59.282718Z", "shell.execute_reply": "2021-05-16T18:43:59.281823Z", "shell.execute_reply.started": "2021-05-16T18:43:59.279399Z" } }, "source": [ "#### (2) Compile and Summary of the Model" ] }, { "cell_type": "code", "execution_count": 10, "id": "2cce0414-05eb-4f79-9c27-bca9c91a2903", "metadata": { "execution": { "iopub.execute_input": "2021-07-08T23:00:28.471734Z", "iopub.status.busy": "2021-07-08T23:00:28.471481Z", "iopub.status.idle": "2021-07-08T23:00:28.552868Z", "shell.execute_reply": "2021-07-08T23:00:28.552157Z", "shell.execute_reply.started": "2021-07-08T23:00:28.471706Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"PorkCNN\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "embedding (Embedding) multiple 586600 \n", "_________________________________________________________________\n", "conv1d (Conv1D) multiple 40100 \n", "_________________________________________________________________\n", "conv1d_1 (Conv1D) multiple 40100 \n", "_________________________________________________________________\n", "conv1d_2 (Conv1D) multiple 60100 \n", "_________________________________________________________________\n", "conv1d_3 (Conv1D) multiple 60100 \n", "_________________________________________________________________\n", "conv1d_4 (Conv1D) multiple 80100 \n", "_________________________________________________________________\n", "conv1d_5 (Conv1D) multiple 100100 \n", "_________________________________________________________________\n", "global_max_pooling1d (Global multiple 0 \n", "_________________________________________________________________\n", "dense (Dense) multiple 179456 \n", "_________________________________________________________________\n", "dropout (Dropout) multiple 0 \n", "_________________________________________________________________\n", "dense_1 (Dense) multiple 257 \n", "=================================================================\n", "Total params: 1,146,913\n", "Trainable params: 1,146,913\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "Dcnn.compile(loss=\"binary_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"])\n", "Dcnn.build(input_shape = (train_data.shape[1], EMB_DIM)) # (train_inputs.shape[1] , EMB_DIM) (785 , EMB_DIM)\n", "Dcnn.summary()" ] }, { "cell_type": "markdown", "id": "4b08770a-d76c-4573-bd24-9984070cc5ba", "metadata": {}, "source": [ "-------------------------\n", "\n", "### Stage 4: K-fold Validation " ] }, { "cell_type": "markdown", "id": "e4123381-f66a-4170-91c6-c2911c0daec1", "metadata": {}, "source": [ "#### (1) Loss & Accuracy\n", "\n", "Code Reference: François Chollet, Deep Learning with Python, 4.3.4, 2020" ] }, { "cell_type": "code", "execution_count": null, "id": "9f47c90c-8952-4824-adf1-a65d3550c2cd", "metadata": { "tags": [] }, "outputs": [], "source": [ "k=5\n", "num_val_samples = len(train_data) // k \n", "num_epochs = 8\n", "batch_size = 230\n", "all_loss = []\n", "all_accuracy = []\n", "for i in range(k):\n", " print('processing fold #%d' % i)\n", " val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]\n", " val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]\n", " partial_train_data = np.concatenate(\n", " [train_data[:i * num_val_samples],\n", " train_data[(i + 1) * num_val_samples:]],\n", " axis=0)\n", " partial_train_targets = np.concatenate(\n", " [train_targets[:i * num_val_samples],\n", " train_targets[(i + 1) * num_val_samples:]],\n", " axis=0)\n", " Dcnn.fit(partial_train_data, partial_train_targets,\n", " epochs=num_epochs,\n", " validation_data=(val_data, val_targets),\n", " batch_size=batch_size,\n", " callbacks=[early_stop], \n", " verbose=1)\n", " loss, accuracy = Dcnn.evaluate(val_data, val_targets, verbose=0, batch_size=batch_size)\n", " all_accuracy.append(accuracy)\n", " all_loss.append(loss)" ] }, { "cell_type": "code", "execution_count": null, "id": "b7637b49-d6f3-4d8a-ad6f-706808de7084", "metadata": { "tags": [] }, "outputs": [], "source": [ "np.mean(all_accuracy)" ] }, { "cell_type": "markdown", "id": "384f9370-c9e5-4d80-8014-66581fafac82", "metadata": {}, "source": [ "### Stage 5: Storing the Validation Logs " ] }, { "cell_type": "code", "execution_count": null, "id": "b97cefc2-458e-4827-8df7-b49c7a74596f", "metadata": { "tags": [] }, "outputs": [], "source": [ "k=5\n", "num_val_samples = len(train_data) // k \n", "num_epochs = 10\n", "batch_size = 230\n", "early_stop = EarlyStopping(monitor='val_loss',patience=1, verbose=1)\n", "loss = [] \n", "accuracy = []\n", "val_loss = []\n", "val_accuracy = []\n", "df= pd.DataFrame()\n", "for i in range(k):\n", " print('processing fold #%d' % i)\n", " val_data = train_data[i * num_val_samples: (i + 1) * num_val_samples]\n", " val_targets = train_targets[i * num_val_samples: (i + 1) * num_val_samples]\n", " partial_train_data = np.concatenate(\n", " [train_data[:i * num_val_samples],\n", " train_data[(i + 1) * num_val_samples:]],\n", " axis=0)\n", " partial_train_targets = np.concatenate(\n", " [train_targets[:i * num_val_samples],\n", " train_targets[(i + 1) * num_val_samples:]],\n", " axis=0)\n", " history = Dcnn.fit(partial_train_data, \n", " partial_train_targets,\n", " epochs=num_epochs,\n", " validation_data=(val_data, val_targets),\n", " batch_size=batch_size,\n", " #callbacks=[early_stop], \n", " verbose=0)\n", " loss = history.history['loss']\n", " accuracy = history.history['accuracy']\n", " val_loss = history.history['val_loss']\n", " val_accuracy = history.history['val_accuracy']" ] }, { "cell_type": "code", "execution_count": null, "id": "09412aca-7cf7-4111-834f-cb41e184eab0", "metadata": { "tags": [] }, "outputs": [], "source": [ "#Building the history of successive mean K-fold validation scores\n", "average_mae_history = [np.mean([x[i] for x in accuracy]) for i in range(num_epochs)] \n", " " ] }, { "cell_type": "markdown", "id": "ef89ca79-2053-490a-9274-077288cad96e", "metadata": {}, "source": [ "### Stage 6: Training the Final Model" ] }, { "cell_type": "code", "execution_count": null, "id": "03598dbd-0ff3-494b-a6ca-415857c2d735", "metadata": { "tags": [] }, "outputs": [], "source": [ "Dcnn.fit(train_data, train_targets,\n", " batch_size=BATCH_SIZE,\n", " epochs=7)" ] }, { "cell_type": "code", "execution_count": null, "id": "23ed354d-956b-49e1-9f3a-71fa6bb59ea5", "metadata": {}, "outputs": [], "source": [ "evaluation_model = Dcnn.evaluate(test_inputs, test_labels, batch_size=BATCH_SIZE)\n", "print(evaluation_model)" ] }, { "cell_type": "code", "execution_count": null, "id": "6cfedebb-ab12-486d-a636-6055129fe7d9", "metadata": { "tags": [] }, "outputs": [], "source": [ "train_data, test_data, train_targets, test_targets" ] }, { "cell_type": "code", "execution_count": null, "id": "1110ede8-f9ef-4d92-b446-de6b5a48af81", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sklearn.metrics import classification_report,confusion_matrix\n", "\n", "# pd.DataFrame(confusion_matrix(test_labels,predictions))\n", "predictions = Dcnn.predict(test_data)\n", "predictions = np.where(predictions >0.8 , 1, 0)\n", "print(classification_report(test_targets, predictions))" ] }, { "cell_type": "code", "execution_count": null, "id": "ce5747b8-febb-4423-bb04-23210960b1b5", "metadata": {}, "outputs": [], "source": [ "t = pd.DataFrame(confusion_matrix(test_targets,predictions), \n", " columns=['Predictions: Not Pork(0)','Predictions:Pork(1)'])\n", "t.index = ['Acutal: Not Pork(0)', 'Acutal: Pork (1)']\n", "t" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }