{ "cells": [ { "cell_type": "code", "execution_count": 63, "id": "c22eadc2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OrgIDOrganizationChurn Date2022-012022-022022-032022-042022-052022-062022-072022-082022-092022-102022-112022-122023-012023-022023-032023-04
030867753Aquent2023-04583850503936464643.027.024.026228.010.02.0
133375202BioLife Solutions, Inc.2023-041174114NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
230867495ZyXel Communications Inc2023-04112124221.0NaNNaNNaNNaN1.01.0NaN
332999202BrightLine Eating Solutions LLC2023-0310,3627,8907,2728,1778,4687,5248,5095,638581.0NaNNaNNaNNaNNaNNaNNaN
430867752Casio America Inc2023-03NaNNaNNaNNaNNaN1NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " OrgID Organization Churn Date 2022-01 2022-02 \n", "0 30867753 Aquent 2023-04 58 38 \\\n", "1 33375202 BioLife Solutions, Inc. 2023-04 11 7 \n", "2 30867495 ZyXel Communications Inc 2023-04 1 1 \n", "3 32999202 BrightLine Eating Solutions LLC 2023-03 10,362 7,890 \n", "4 30867752 Casio America Inc 2023-03 NaN NaN \n", "\n", " 2022-03 2022-04 2022-05 2022-06 2022-07 2022-08 2022-09 2022-10 2022-11 \n", "0 50 50 39 36 46 46 43.0 27.0 24.0 \\\n", "1 4 11 4 NaN NaN NaN NaN NaN NaN \n", "2 2 1 2 4 2 2 1.0 NaN NaN \n", "3 7,272 8,177 8,468 7,524 8,509 5,638 581.0 NaN NaN \n", "4 NaN NaN NaN 1 NaN NaN NaN NaN NaN \n", "\n", " 2022-12 2023-01 2023-02 2023-03 2023-04 \n", "0 26 22 8.0 10.0 2.0 \n", "1 NaN NaN NaN NaN NaN \n", "2 NaN NaN 1.0 1.0 NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import sketch\n", "import numpy as np\n", "cd = pd.read_csv(\"/Users/normrasmussen/Downloads/churned_analysis.csv\")\n", "cd.head()" ] }, { "cell_type": "code", "execution_count": 64, "id": "e477c8d4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OrgIDOrganizationChurn Date2022-012022-022022-032022-042022-052022-062022-072022-082022-092022-102022-112022-122023-012023-022023-032023-04
030867753Aquent2023-04583850503936464643272426228102
133375202BioLife Solutions Inc.2023-04117411400000000000
230867495ZyXel Communications Inc2023-041121242210000110
332999202BrightLine Eating Solutions LLC2023-031036278907272817784687524850956385810000000
430867752Casio America Inc2023-030000010000000000
\n", "
" ], "text/plain": [ " OrgID Organization Churn Date 2022-01 2022-02 \n", "0 30867753 Aquent 2023-04 58 38 \\\n", "1 33375202 BioLife Solutions Inc. 2023-04 11 7 \n", "2 30867495 ZyXel Communications Inc 2023-04 1 1 \n", "3 32999202 BrightLine Eating Solutions LLC 2023-03 10362 7890 \n", "4 30867752 Casio America Inc 2023-03 0 0 \n", "\n", " 2022-03 2022-04 2022-05 2022-06 2022-07 2022-08 2022-09 2022-10 \n", "0 50 50 39 36 46 46 43 27 \\\n", "1 4 11 4 0 0 0 0 0 \n", "2 2 1 2 4 2 2 1 0 \n", "3 7272 8177 8468 7524 8509 5638 581 0 \n", "4 0 0 0 1 0 0 0 0 \n", "\n", " 2022-11 2022-12 2023-01 2023-02 2023-03 2023-04 \n", "0 24 26 22 8 10 2 \n", "1 0 0 0 0 0 0 \n", "2 0 0 0 1 1 0 \n", "3 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 " ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Replace all NaN values with 0\n", "cd.fillna(0, inplace=True)\n", "cd.replace(np.inf, 0)\n", "\n", "# Remove all commas from strings\n", "cd.replace(',','', regex=True, inplace=True)\n", "\n", "# Convert all strings after the column 'Churn Data' to integers\n", "for col in cd.columns[4:]:\n", " cd[col] = cd[col].astype(int)\n", "\n", "cd.head()" ] }, { "cell_type": "code", "execution_count": 65, "id": "06f9cb22", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2022-012022-022022-032022-042022-052022-062022-072022-082022-092022-102022-112022-122023-012023-022023-032023-04
0583850503936464643272426228102
1117411400000000000
21121242210000110
31036278907272817784687524850956385810000000
40000010000000000
58119512626327968352333257571294
600000410000193000
738637839673294776212038894381781152149216500
80112754566000000
96315104823044655200
1028277917138010112000
1123719213824621351071000
1201422217000020200
13381677583671515380181449120111000
147217758256065785014975436285056501270195719000
15055208743401090100
16276628952633237822632143206058700000000
170000000000000000
180122012021201000
1991833885132814156136200
2012615813664759683633813101413300
214210000000001000
224044243340000000000000
231310637212585482200
240000000000000110
252019724923213815410234400
2629579122043810715991281100
\n", "
" ], "text/plain": [ " 2022-01 2022-02 2022-03 2022-04 2022-05 2022-06 2022-07 2022-08 \n", "0 58 38 50 50 39 36 46 46 \\\n", "1 11 7 4 11 4 0 0 0 \n", "2 1 1 2 1 2 4 2 2 \n", "3 10362 7890 7272 8177 8468 7524 8509 5638 \n", "4 0 0 0 0 0 1 0 0 \n", "5 81 19 51 26 26 32 79 68 \n", "6 0 0 0 0 0 4 1 0 \n", "7 386 378 396 732 947 762 1203 889 \n", "8 0 1 1 2 7 5 4 5 \n", "9 63 15 10 4 8 2 3 0 \n", "10 28 27 79 17 1 3 8 0 \n", "11 2 37 19 2 1 38 24 6 \n", "12 0 1 4 22 2 1 7 0 \n", "13 381 677 583 671 515 380 181 449 \n", "14 721 775 825 606 578 501 497 543 \n", "15 0 5 5 20 8 7 4 3 \n", "16 2766 2895 2633 2378 2263 2143 2060 587 \n", "17 0 0 0 0 0 0 0 0 \n", "18 0 1 2 2 0 1 2 0 \n", "19 9 18 33 8 8 5 13 28 \n", "20 126 158 136 64 75 96 83 63 \n", "21 4 2 1 0 0 0 0 0 \n", "22 404 424 334 0 0 0 0 0 \n", "23 13 10 6 3 7 21 2 5 \n", "24 0 0 0 0 0 0 0 0 \n", "25 20 197 24 9 23 21 38 15 \n", "26 29 5 7 9 12 20 4 38 \n", "\n", " 2022-09 2022-10 2022-11 2022-12 2023-01 2023-02 2023-03 2023-04 \n", "0 43 27 24 26 22 8 10 2 \n", "1 0 0 0 0 0 0 0 0 \n", "2 1 0 0 0 0 1 1 0 \n", "3 581 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 \n", "5 35 23 33 25 75 71 29 4 \n", "6 0 0 0 19 3 0 0 0 \n", "7 438 178 115 214 92 165 0 0 \n", "8 6 6 0 0 0 0 0 0 \n", "9 4 4 6 5 5 2 0 0 \n", "10 1 0 1 1 2 0 0 0 \n", "11 21 35 10 7 1 0 0 0 \n", "12 0 0 0 2 0 2 0 0 \n", "13 12 0 1 1 1 0 0 0 \n", "14 628 505 650 1270 1957 190 0 0 \n", "15 4 0 10 9 0 1 0 0 \n", "16 0 0 0 0 0 0 0 0 \n", "17 0 0 0 0 0 0 0 0 \n", "18 2 1 2 0 1 0 0 0 \n", "19 14 15 6 13 6 2 0 0 \n", "20 38 13 10 14 13 3 0 0 \n", "21 0 0 0 0 1 0 0 0 \n", "22 0 0 0 0 0 0 0 0 \n", "23 8 5 4 8 2 2 0 0 \n", "24 0 0 0 0 0 1 1 0 \n", "25 4 10 2 3 4 4 0 0 \n", "26 10 7 15 99 128 11 0 0 " ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create a copy of the dataframe\n", "cd_scaled = cd.copy()\n", "cd_scaled = cd_scaled.drop(['Organization', 'Churn Date', 'OrgID'], axis=1)\n", "cd_scaled.head(100)" ] }, { "cell_type": "code", "execution_count": 66, "id": "af57bdc4", "metadata": {}, "outputs": [ { "ename": "ZeroDivisionError", "evalue": "division by zero", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mZeroDivisionError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[66], line 9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Scale each value in the row using min-max scaling\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m row[\u001b[38;5;241m2\u001b[39m:]:\n\u001b[0;32m----> 9\u001b[0m cd_scaled\u001b[38;5;241m.\u001b[39mloc[index, col] \u001b[38;5;241m=\u001b[39m \u001b[43m(\u001b[49m\u001b[43mcol\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmin_val\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mmax_val\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmin_val\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mZeroDivisionError\u001b[0m: division by zero" ] } ], "source": [ "# Iterate over each row in the dataframe\n", "for index, row in cd_scaled.iterrows():\n", " # Get the min and max values for the row\n", " min_val = int(row[1:].min())\n", " max_val = int(row[1:].max())\n", "\n", " # Scale each value in the row using min-max scaling\n", " for col in row[2:]:\n", " cd_scaled.loc[index, col] = (col - min_val) / (max_val - min_val)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" } }, "nbformat": 4, "nbformat_minor": 5 }