Azarthehulk
/

Decision_tree

Scikit-learn

English

biology

Model card Files Files and versions

xet

Community

Azarthehulk commited on Mar 2, 2023

Commit

0b73a08

1 Parent(s): 467a62c

Upload 21075A6603-DecisioN_TREE.ipynb

Browse files

Files changed (1) hide show

21075A6603-DecisioN_TREE.ipynb +263 -0

21075A6603-DecisioN_TREE.ipynb ADDED Viewed

	@@ -0,0 +1,263 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ab540ee7",
+   "metadata": {},
+   "source": [
+    "# Decision Tree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "92d3ce84",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import confusion_matrix\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.metrics import classification_report\n",
+    "from sklearn.datasets import load_iris\n",
+    "iris=load_iris()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "dd4c544d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X,y=iris.data,iris.target"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "abe99084",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_using_gini(X_train, y_train):\n",
+    "    clf_gini = DecisionTreeClassifier(criterion = \"gini\", random_state = 100,max_depth=3, min_samples_leaf=4)\n",
+    "    clf_gini.fit(X_train, y_train)\n",
+    "    return clf_gini"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3e9ddda5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Using Entropy\n",
+    "def train_using_entropy(X_train,y_train):\n",
+    "#Creating a classifier object\n",
+    "    clf_entropy = DecisionTreeClassifier(criterion=\"entropy\",random_state = 100,max_depth=3,min_samples_leaf=4)\n",
+    "#Training\n",
+    "    clf_entropy.fit(X_train,y_train)\n",
+    "    return clf_entropy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "74fd9b39",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prediction(X_test,clf_object):\n",
+    "    y_pred=clf_object.predict(X_test)\n",
+    "    print(\"Predicted values:\",y_pred)\n",
+    "    return y_pred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "0b47818b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Function to calculate accuracy\n",
+    "def cal_accuracy(y_test,y_pred):\n",
+    "    print(\"Confusion Matrix: \",confusion_matrix(y_test,y_pred))\n",
+    "    print(\"Accuracy:\",accuracy_score(y_test,y_pred)*100)\n",
+    "    print(\"Report :\",classification_report(y_test,y_pred))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "0f94ba7d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dimensions for training data (105, 4)\n",
+      "Dimensions for testing data (105,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split( X, y, test_size = 0.3, random_state = 100)\n",
+    "print(\"Dimensions for training data\",X_train.shape)\n",
+    "print(\"Dimensions for testing data\",y_train.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "a7ed365c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Results Using Gini Index:\n",
+      "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n",
+      " 1 2 2 0 1 2 2 0]\n",
+      "Confusion Matrix:  [[16  0  0]\n",
+      " [ 0 10  1]\n",
+      " [ 0  1 17]]\n",
+      "Accuracy: 95.55555555555556\n",
+      "Report :               precision    recall  f1-score   support\n",
+      "\n",
+      "           0       1.00      1.00      1.00        16\n",
+      "           1       0.91      0.91      0.91        11\n",
+      "           2       0.94      0.94      0.94        18\n",
+      "\n",
+      "    accuracy                           0.96        45\n",
+      "   macro avg       0.95      0.95      0.95        45\n",
+      "weighted avg       0.96      0.96      0.96        45\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Gini Index\n",
+    "clf_gini = train_using_gini(X_train, y_train)\n",
+    "print(\"Results Using Gini Index:\")\n",
+    "# Prediction using gini\n",
+    "y_pred_gini = prediction(X_test, clf_gini)\n",
+    "cal_accuracy(y_test, y_pred_gini)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "0cd3759c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n",
+      " 1 2 2 0 1 2 2 0]\n",
+      "Confusion Matrix:  [[16  0  0]\n",
+      " [ 0 10  1]\n",
+      " [ 0  1 17]]\n",
+      "Accuracy: 95.55555555555556\n",
+      "Report :               precision    recall  f1-score   support\n",
+      "\n",
+      "           0       1.00      1.00      1.00        16\n",
+      "           1       0.91      0.91      0.91        11\n",
+      "           2       0.94      0.94      0.94        18\n",
+      "\n",
+      "    accuracy                           0.96        45\n",
+      "   macro avg       0.95      0.95      0.95        45\n",
+      "weighted avg       0.96      0.96      0.96        45\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Analysing Metrics using entropy\n",
+    "clf_entropy = train_using_entropy(X_train,y_train)\n",
+    "# Prediction using entropy\n",
+    "y_pred_entropy = prediction(X_test, clf_entropy)\n",
+    "cal_accuracy(y_test, y_pred_entropy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "bfb36a8a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Results Using Gini Index:\n",
+      "Predicted values: [2 0 2 0 2 2 0 0 2 0 0 2 0 0 2 1 1 2 2 2 2 0 2 0 1 2 1 0 1 2 1 1 1 0 0 1 0\n",
+      " 1 2 2 0 1 2 2 0]\n",
+      "Confusion Matrix:  [[16  0  0]\n",
+      " [ 0 10  1]\n",
+      " [ 0  1 17]]\n",
+      "Accuracy: 95.55555555555556\n",
+      "Report :               precision    recall  f1-score   support\n",
+      "\n",
+      "           0       1.00      1.00      1.00        16\n",
+      "           1       0.91      0.91      0.91        11\n",
+      "           2       0.94      0.94      0.94        18\n",
+      "\n",
+      "    accuracy                           0.96        45\n",
+      "   macro avg       0.95      0.95      0.95        45\n",
+      "weighted avg       0.96      0.96      0.96        45\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#lets observe what the result will be if we change dept to 2 and leafs to 3\n",
+    "def train_using_gini(X_train, y_train):\n",
+    "    clf_gini = DecisionTreeClassifier(criterion = \"gini\", random_state = 150,max_depth=5, min_samples_leaf=3)\n",
+    "    clf_gini.fit(X_train, y_train)\n",
+    "    return clf_gini\n",
+    "clf_gini = train_using_gini(X_train, y_train)\n",
+    "print(\"Results Using Gini Index:\")\n",
+    "# Prediction using gini\n",
+    "y_pred_gini = prediction(X_test, clf_gini)\n",
+    "cal_accuracy(y_test, y_pred_gini)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1ec89b9d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}