Cleaned Data Added With Visual File

This commit is contained in:
2022-11-18 14:01:27 +05:30
parent 6f94ca7525
commit a3b798e317
4 changed files with 728 additions and 99 deletions

View File

@@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -74,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -305,7 +305,7 @@
"9 0 "
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -324,7 +324,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -359,7 +359,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -499,7 +499,7 @@
"max 291.050000 97.600000 1.000000 "
]
},
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -510,7 +510,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -542,7 +542,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -683,7 +683,7 @@
"4 0 "
]
},
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -696,7 +696,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -743,59 +743,59 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>39696</th>\n",
" <td>24427</td>\n",
" <th>25469</th>\n",
" <td>40932</td>\n",
" <td>1</td>\n",
" <td>63.0</td>\n",
" <td>0</td>\n",
" <td>20.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>73.20</td>\n",
" <td>26.4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>91.23</td>\n",
" <td>24.5</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9062</th>\n",
" <td>23897</td>\n",
" <td>1</td>\n",
" <td>4.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>86.33</td>\n",
" <td>28.7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38440</th>\n",
" <td>43933</td>\n",
" <td>0</td>\n",
" <td>57.0</td>\n",
" <th>23973</th>\n",
" <td>16201</td>\n",
" <td>1</td>\n",
" <td>48.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>59.41</td>\n",
" <td>34.9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37515</th>\n",
" <td>29824</td>\n",
" <td>1</td>\n",
" <td>34.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>104.07</td>\n",
" <td>45.9</td>\n",
" <td>80.87</td>\n",
" <td>19.8</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
@@ -809,37 +809,40 @@
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22940</th>\n",
" <td>16030</td>\n",
" <th>37135</th>\n",
" <td>57514</td>\n",
" <td>1</td>\n",
" <td>71.0</td>\n",
" <td>55.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>96.03</td>\n",
" <td>NaN</td>\n",
" <td>132.16</td>\n",
" <td>29.1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12098</th>\n",
" <td>72294</td>\n",
" <td>1</td>\n",
" <td>59.0</td>\n",
" <th>20314</th>\n",
" <td>12476</td>\n",
" <td>0</td>\n",
" <td>62.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>90.06</td>\n",
" <td>27.0</td>\n",
" <td>110.97</td>\n",
" <td>34.2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
@@ -848,9 +851,6 @@
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@@ -858,49 +858,49 @@
],
"text/plain": [
" id gender age hypertension heart_disease ever_married \\\n",
"39696 24427 0 20.0 0 0 0 \n",
"38440 43933 0 57.0 0 0 1 \n",
"37515 29824 1 34.0 0 0 1 \n",
"22940 16030 1 71.0 0 0 1 \n",
"12098 72294 1 59.0 0 0 1 \n",
"25469 40932 1 63.0 0 0 1 \n",
"9062 23897 1 4.0 0 0 0 \n",
"23973 16201 1 48.0 0 0 1 \n",
"37135 57514 1 55.0 0 0 1 \n",
"20314 12476 0 62.0 0 0 1 \n",
"\n",
" avg_glucose_level bmi stroke work_type_Govt_job \\\n",
"39696 91.23 24.5 0 1 \n",
"38440 59.41 34.9 0 1 \n",
"37515 104.07 45.9 0 0 \n",
"22940 96.03 NaN 0 0 \n",
"12098 90.06 27.0 0 0 \n",
"25469 73.20 26.4 0 0 \n",
"9062 86.33 28.7 0 0 \n",
"23973 80.87 19.8 0 0 \n",
"37135 132.16 29.1 0 1 \n",
"20314 110.97 34.2 0 1 \n",
"\n",
" work_type_Never_worked work_type_Private work_type_Self-employed \\\n",
"39696 0 0 0 \n",
"38440 0 0 0 \n",
"37515 0 1 0 \n",
"22940 0 1 0 \n",
"12098 0 0 1 \n",
"25469 0 0 1 \n",
"9062 0 0 0 \n",
"23973 0 1 0 \n",
"37135 0 0 0 \n",
"20314 0 0 0 \n",
"\n",
" work_type_children Residence_type_Rural Residence_type_Urban \\\n",
"39696 0 0 1 \n",
"38440 0 1 0 \n",
"37515 0 0 1 \n",
"22940 0 0 1 \n",
"12098 0 1 0 \n",
"25469 0 1 0 \n",
"9062 1 0 1 \n",
"23973 0 0 1 \n",
"37135 0 1 0 \n",
"20314 0 0 1 \n",
"\n",
" smoking_status_formerly smoked smoking_status_never smoked \\\n",
"39696 0 0 \n",
"38440 1 0 \n",
"37515 0 0 \n",
"22940 1 0 \n",
"12098 0 0 \n",
"25469 0 1 \n",
"9062 0 0 \n",
"23973 0 0 \n",
"37135 0 1 \n",
"20314 0 1 \n",
"\n",
" smoking_status_smokes \n",
"39696 0 \n",
"38440 0 \n",
"37515 1 \n",
"22940 0 \n",
"12098 0 "
"25469 0 \n",
"9062 0 \n",
"23973 1 \n",
"37135 0 \n",
"20314 0 "
]
},
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -912,7 +912,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -940,7 +940,7 @@
"dtype: int64"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -951,13 +951,22 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df = df.dropna(how = 'any', axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"df.to_excel('datasetCleaned.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 10,
@@ -4981,7 +4990,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.13 ('BrainStrokePredictionMLEnv')",
"display_name": "Python 3.8.13 ('StrokePredictionModel')",
"language": "python",
"name": "python3"
},
@@ -4995,12 +5004,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.8.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "54c59028773620d1ec7cf564885279046a3969c7da2b497982c8156e7da39d8c"
"hash": "6d6bab66b583e7661b89cead2220317a23c391a40fb8c52f2c1bcd3c04f3fbda"
}
}
},