2023-04-12 08:36:25 -04:00
|
|
|
|
{
|
|
|
|
|
|
"cells": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 14,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"import pandas as pd \n",
|
|
|
|
|
|
"import numpy as np \n",
|
|
|
|
|
|
"import seaborn as sns \n",
|
|
|
|
|
|
"import matplotlib.pylab as plt \n",
|
|
|
|
|
|
"import sketch\n",
|
|
|
|
|
|
"plt.style.use('ggplot')"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 15,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"pages = pd.read_csv(\"/Users/normrasmussen/Downloads/app_usage.csv\")"
|
2023-04-12 08:36:25 -04:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 16,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"(59433, 3)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 16,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"pages.shape"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 17,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/html": [
|
|
|
|
|
|
"<div>\n",
|
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"</style>\n",
|
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
|
" <th></th>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>School Name</th>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <th>Path</th>\n",
|
|
|
|
|
|
" <th>Mar 13 - Apr 11</th>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/courses</td>\n",
|
|
|
|
|
|
" <td>25</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/previews</td>\n",
|
|
|
|
|
|
" <td>16</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/account_links</td>\n",
|
|
|
|
|
|
" <td>11</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/analytics</td>\n",
|
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/settings/general/edit</td>\n",
|
|
|
|
|
|
" <td>7</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/apps/integrations</td>\n",
|
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>6</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/accounts/general/edit</td>\n",
|
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/people</td>\n",
|
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/settings/authentication</td>\n",
|
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>9</th>\n",
|
|
|
|
|
|
" <td>Vivvix University for Ad Intel</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>/styling/general/edit</td>\n",
|
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
|
"</table>\n",
|
|
|
|
|
|
"</div>"
|
|
|
|
|
|
],
|
|
|
|
|
|
"text/plain": [
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" School Name Path Mar 13 - Apr 11\n",
|
|
|
|
|
|
"0 Vivvix University for Ad Intel /courses 25\n",
|
|
|
|
|
|
"1 Vivvix University for Ad Intel /previews 16\n",
|
|
|
|
|
|
"2 Vivvix University for Ad Intel /account_links 11\n",
|
|
|
|
|
|
"3 Vivvix University for Ad Intel /analytics 8\n",
|
|
|
|
|
|
"4 Vivvix University for Ad Intel /settings/general/edit 7\n",
|
|
|
|
|
|
"5 Vivvix University for Ad Intel /apps/integrations 4\n",
|
|
|
|
|
|
"6 Vivvix University for Ad Intel /accounts/general/edit 4\n",
|
|
|
|
|
|
"7 Vivvix University for Ad Intel /people 3\n",
|
|
|
|
|
|
"8 Vivvix University for Ad Intel /settings/authentication 3\n",
|
|
|
|
|
|
"9 Vivvix University for Ad Intel /styling/general/edit 2"
|
2023-04-12 08:36:25 -04:00
|
|
|
|
]
|
|
|
|
|
|
},
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 17,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"pages.head(10)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 18,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"outputs": [],
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"source": [
|
|
|
|
|
|
"pages_sum = pd.DataFrame(columns=['Total Views'])\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"pages_sum['Total Views'] = pages.groupby(by='School Name')['Mar 13 - Apr 11'].sum()"
|
2023-04-12 08:36:25 -04:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 19,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/html": [
|
|
|
|
|
|
"<div>\n",
|
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"</style>\n",
|
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
|
" <th>Total Views</th>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>School Name</th>\n",
|
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>Vivvix for Ad Intel</th>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>110</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>'@properties</th>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>322</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>4 Pillars</th>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>112</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>AIIM+</th>\n",
|
|
|
|
|
|
" <td>528</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>ASAPP</th>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" <td>153</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>| AgencyBloc</th>\n",
|
|
|
|
|
|
" <td>3373</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>Académie Évolupharma</th>\n",
|
|
|
|
|
|
" <td>253</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>Access</th>\n",
|
|
|
|
|
|
" <td>11</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>ActiveLearner</th>\n",
|
|
|
|
|
|
" <td>1907</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>Ad Intel</th>\n",
|
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" </tbody>\n",
|
|
|
|
|
|
"</table>\n",
|
|
|
|
|
|
"</div>"
|
|
|
|
|
|
],
|
|
|
|
|
|
"text/plain": [
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" Total Views\n",
|
|
|
|
|
|
"School Name \n",
|
|
|
|
|
|
" Vivvix for Ad Intel 110\n",
|
|
|
|
|
|
"'@properties 322\n",
|
|
|
|
|
|
"4 Pillars 112\n",
|
|
|
|
|
|
"AIIM+ 528\n",
|
|
|
|
|
|
"ASAPP 153\n",
|
|
|
|
|
|
" | AgencyBloc 3373\n",
|
|
|
|
|
|
"Académie Évolupharma 253\n",
|
|
|
|
|
|
"Access 11\n",
|
|
|
|
|
|
"ActiveLearner 1907\n",
|
|
|
|
|
|
"Ad Intel 2"
|
2023-04-12 08:36:25 -04:00
|
|
|
|
]
|
|
|
|
|
|
},
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 19,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"to_remove = ['Academy', 'academy', 'University', 'university', 'school', 'School', 'Sandbox', 'Knowledge Base']\n",
|
|
|
|
|
|
"for item in pages_sum.itertuples(index=True):\n",
|
|
|
|
|
|
" for word in to_remove:\n",
|
|
|
|
|
|
" if word in item[0]:\n",
|
|
|
|
|
|
" less = item[0].replace(word, '')\n",
|
|
|
|
|
|
" pages_sum.rename(index={item[0]:less}, inplace=True)\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"pages_sum.head(10)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 20,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"pages_sum.index = pages_sum.index.str.strip()"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 21,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"{'Total Views': {'Vivvix for Ad Intel': 110, \"'@properties\": 322, '4 Pillars': 112, 'AIIM+': 528, 'ASAPP': 153, '| AgencyBloc': 3373, 'Académie Évolupharma': 253, 'Access': 11, 'ActiveLearner': 1907, 'Ad Intel': 765, 'AdCellerant': 5, 'Adkins': 107, 'Agility': 156, 'AgriWebb': 567, 'AgriWebb Brazil': 333, 'Akua Mind Body': 578, 'AlphaSense': 17, 'Altenew': 859, 'Altima Dental': 33, 'American Concrete Pipe Association Learning Center': 2864, 'AnChain.AI': 437, 'AndHealth's': 2628, 'Andhealth Internal Training': 17, 'Ansley': 116, 'Anthology': 13, 'Anthology Internal': 104, 'Anthology Trial': 418, 'Aquent's Learning Lab': 16, 'Arrcus': 8, 'Artera': 54, 'Ascend Training by Stenograph': 9, 'AtlasIED': 125, 'Atlassian': 41, 'AutoServe1': 4, 'Autoserve1 New': 1, 'BCNDP Caucus': 26, 'BLVD Mentor Learning Library': 223, 'BNY Mellon OMNI Digital': 364, 'Bartosz's': 3, 'Bellhop': 118, 'Belmont Farm Nursery': 29, 'BenchSci': 1659, 'Big Ideas Learning': 167, 'Big Ideas Learning Sandbox': 7, 'BirchStreet': 11, 'BirchStreet Trial': 446, 'Bisla': 402, 'Blacklane Chauffeur Learning': 263, 'Blacklane Dubai Chauffeurs': 272, 'Blacklane Learning & Development': 1913, 'Blackstone Industrial': 42, 'Bolt': 1177, 'BombBomb Studios': 673, 'Bonfire': 19, 'Boon Edam': 164, 'Brand Addition's': 457, 'Brian's Starfleet': 3, 'Brighton Science': 1703, 'Bungii Operations': 155, 'CATalyst': 161, 'CHEK Institute': 11138, 'CIBC FirstCaribbean Online and Mobile Banking': 5, 'CIRE': 149, 'CMIC': 200, 'CONCEPT ACADEMY': 468, 'CORE College': 1419, 'CRM Training Courses and Tutorials | Pipedrive Learn': 984, 'CSNews Technology': 1, 'CTAA's': 1947, 'Canavan Byrne': 1380, 'Capillary': 1473, 'Cappfinity': 108, 'Capstone Scholars Program': 456, 'Cayuse': 99, 'Censis': 1300, 'Channable Internal': 551, 'Channable': 40, 'Chann': 152, 'Chico Electric': 135, 'Cholodecki current': 1, 'Christian': 5, 'Christie's International Real Estate pl@tform': 273, 'ChurnZero': 41, 'Clay Clerk': 525, 'CloudRadial': 455, 'Cloudticity': 27, 'Cluid Housing': 65, 'Coastal Consulting': 114, 'Cofense': 129, 'Community Fire Prevention': 37, 'Compass': 4327, 'Compass U': 265, 'Competitive Advantage': 48, 'Con-Trol Training System': 816, 'Con-Trol Training System - BambooHR': 179, 'Conlon's': 598, 'Cook Brother's Bars': 361, 'Corporate Traveler': 3, 'Curso de Formação': 93, 'CurveBeam': 87, 'DELETE - Airbnb Host Hub': 12, 'DELETE - Airbnb Photography': 13, 'DELETE - BLE Dev': 14, 'DELETE - Bright Line Hub': 24, 'DELETE - Bright Liners Hub': 1020, 'DELETE - Dealerware': 14, 'DELETE - Dynamic Yield': 14, 'DELETE - Gett': 13, 'DELETE - H1 Insights Customer': 21, 'DELETE - H1 Insights Inc': 47, 'DELETE - Happy Culture': 15, 'DELETE - Numerator for Promotions Intel': 15, 'DELETE - OTHRSource': 16, 'DELETE - PeopleDev': 14, 'DELETE - Pixability's': 1, 'DELETE - SANDBOX Bright Liners Hub': 28, 'DELETE - The GLAMSQUAD Online Learning Center': 16, 'DELETE - Uber Compliance El Salvador': 14, 'DELETE - Uber Ecuador': 13, 'DELETE - Uber Peru': 19, 'DELETE - Uber Peru Compliance': 14, 'DELETE - Yotpo': 13, 'DV Learning': 601, 'Datawords': 221, 'Dell AMD Learning': 1192, 'Dept Agency': 575, 'Desktop Shipper': 875, 'Devils': 9, 'Dolly': 23, 'DoorDash Shop & Deliver': 10, 'DoorDash's': 592, 'DoubleVerify': 89, 'Douglas Elliman MyLearning.Elliman.com': 943, 'Doximity Compliance Learning': 27, 'Doximity': 183, 'Dray Alliance': 207, 'EASE': 373, 'EF High Exchange Year: IEC': 1703, 'EF High Exchange Year: Staff': 6, 'EF host family resource center': 29, 'EF student learning center': 74, 'ERA Key Realty Services': 66, 'ES&E': 329, 'EXOR & CORVINA': 189, 'ElderSource Ecommerce': 102, 'ElderSource Internal': 16, 'ElderSource Partner': 4, 'Election Protection': 39, 'Embee': 4, 'Encore Healthcare': 290, 'Energy Foundation': 490, 'Engel & Völkers': 566, 'English for Asia': 2270, 'Equip By Equiem': 143, 'Essentium Training Center': 11, 'Everlaw Certification Center': 33, 'Evolve IP': 426, 'Excalibur Data Systems Online Learning': 123,
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"test_dict = pages_sum.to_dict()\n",
|
|
|
|
|
|
"print(test_dict)"
|
2023-04-12 08:36:25 -04:00
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 22,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/html": [
|
|
|
|
|
|
"<div>\n",
|
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
|
" }\n",
|
|
|
|
|
|
"</style>\n",
|
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
|
" <th></th>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>Twotal</th>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>School Name</th>\n",
|
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>'@properties</th>\n",
|
|
|
|
|
|
" <td>322</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>4 Pillars</th>\n",
|
|
|
|
|
|
" <td>112</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>AIIM+</th>\n",
|
|
|
|
|
|
" <td>528</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>ASAPP</th>\n",
|
|
|
|
|
|
" <td>153</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>Académie Évolupharma</th>\n",
|
|
|
|
|
|
" <td>253</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
|
" <td>...</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>Yakademy</th>\n",
|
|
|
|
|
|
" <td>1</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" <th>Your Pathway Home</th>\n",
|
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>ZENOTI</th>\n",
|
|
|
|
|
|
" <td>42</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>ZU EWC</th>\n",
|
|
|
|
|
|
" <td>14</td>\n",
|
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
|
" <th>ZU Sorbet</th>\n",
|
|
|
|
|
|
" <td>8</td>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
|
"</table>\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"<p>489 rows × 1 columns</p>\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"</div>"
|
|
|
|
|
|
],
|
|
|
|
|
|
"text/plain": [
|
2023-04-13 17:05:51 -04:00
|
|
|
|
" Twotal\n",
|
|
|
|
|
|
"School Name \n",
|
|
|
|
|
|
"'@properties 322\n",
|
|
|
|
|
|
"4 Pillars 112\n",
|
|
|
|
|
|
"AIIM+ 528\n",
|
|
|
|
|
|
"ASAPP 153\n",
|
|
|
|
|
|
"Académie Évolupharma 253\n",
|
|
|
|
|
|
"... ...\n",
|
|
|
|
|
|
"Yakademy 1\n",
|
|
|
|
|
|
"Your Pathway Home 3\n",
|
|
|
|
|
|
"ZENOTI 42\n",
|
|
|
|
|
|
"ZU EWC 14\n",
|
|
|
|
|
|
"ZU Sorbet 8\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"[489 rows x 1 columns]"
|
2023-04-12 08:36:25 -04:00
|
|
|
|
]
|
|
|
|
|
|
},
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 22,
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"pages_three = pd.DataFrame(columns=['Twotal'])\n",
|
|
|
|
|
|
"pages_three['Twotal'] = pages_sum.groupby(['School Name'])['Total Views'].sum()\n",
|
|
|
|
|
|
"pages_three.head(-20)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 23,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"pages_three.loc[pages_three.index == 'Zenjob']\n",
|
|
|
|
|
|
"acct_list = pages_three.index.to_list()"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 24,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"Bolt Volt\n",
|
|
|
|
|
|
"Landis LandisU\n",
|
|
|
|
|
|
"Ocean Oceana\n",
|
|
|
|
|
|
"Skuad Skool Skuid Skool\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"from Levenshtein import distance as lev\n",
|
|
|
|
|
|
"import itertools\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"acct1 = []\n",
|
|
|
|
|
|
"comp = []\n",
|
|
|
|
|
|
"for (\n",
|
|
|
|
|
|
" name1,\n",
|
|
|
|
|
|
" name2,\n",
|
|
|
|
|
|
") in itertools.combinations(acct_list, 2):\n",
|
|
|
|
|
|
" # print(name1, name2) - prints all pairs, working so far.\n",
|
|
|
|
|
|
" distance = lev(name1, name2)\n",
|
|
|
|
|
|
" # print(distance) - successfully returns numbers\n",
|
|
|
|
|
|
" if distance > 0 and distance < 2:\n",
|
|
|
|
|
|
" print(name1, name2)\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 25,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"Stored 'pages' (DataFrame)\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"pages = pages_three\n",
|
|
|
|
|
|
"%store pages"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 26,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"arr = pd.read_csv(\"/Users/normrasmussen/Downloads/cust_arr.csv\", index_col=0, header=[0])\n",
|
|
|
|
|
|
"arr.fillna(0, inplace=True)\n",
|
|
|
|
|
|
"arr.replace(',','', regex=True, inplace=True)\n",
|
|
|
|
|
|
"arr['ARR'] = arr['ARR'].astype(int)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 27,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"arr = arr.drop(axis=1, columns=['Renewaldate Date', 'CSM'])"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 28,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"data": {
|
|
|
|
|
|
"text/plain": [
|
|
|
|
|
|
"(354, 1)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 28,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"arr.shape"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 29,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"['Stuart']\n",
|
|
|
|
|
|
"['HelloTech', 'Inc.']\n",
|
|
|
|
|
|
"['Shopify', 'Inc.']\n",
|
|
|
|
|
|
"['Hardskills', 'Pte.', 'Ltd']\n",
|
|
|
|
|
|
"['Shipt']\n",
|
|
|
|
|
|
"['SPS', 'Commerce']\n",
|
|
|
|
|
|
"['CDP']\n",
|
|
|
|
|
|
"['Altenew']\n",
|
|
|
|
|
|
"['BrightMove']\n",
|
|
|
|
|
|
"['SITA', 'B.V.', 'c/o', 'Malta', 'International', 'Airport']\n",
|
|
|
|
|
|
"['Football', 'Marketing', 'Asia']\n",
|
|
|
|
|
|
"['Prism', 'Maritime']\n",
|
|
|
|
|
|
"['Skuid']\n",
|
|
|
|
|
|
"['SmartPM', 'Technologies']\n",
|
|
|
|
|
|
"['DoorDash,', 'Inc.']\n",
|
|
|
|
|
|
"['International', 'Care', 'Ltd.']\n",
|
|
|
|
|
|
"['BrightMove']\n",
|
|
|
|
|
|
"['Epiphany', 'Healthcare']\n",
|
|
|
|
|
|
"['Artsy']\n",
|
|
|
|
|
|
"['Évolupharma,', 'Inc.']\n",
|
|
|
|
|
|
"['UAB', 'Omnisend']\n",
|
|
|
|
|
|
"['Epiphany', 'Healthcare']\n",
|
|
|
|
|
|
"['Artsy']\n",
|
|
|
|
|
|
"['Renu', 'Contracting', 'Restoration']\n",
|
|
|
|
|
|
"['Minga']\n",
|
|
|
|
|
|
"['Renu', 'Contracting', 'Restoration']\n",
|
|
|
|
|
|
"['Viva', 'Online', 'Services', 'SA']\n",
|
|
|
|
|
|
"['Health', 'Current']\n",
|
|
|
|
|
|
"['Cherokee', 'Enterprises,', 'Inc.']\n",
|
|
|
|
|
|
"['Kinetica']\n",
|
|
|
|
|
|
"['Innovaccer', 'Inc.']\n",
|
|
|
|
|
|
"['Bellhop,', 'Inc']\n",
|
|
|
|
|
|
"['ISOutsource']\n",
|
|
|
|
|
|
"['Safic-Alcan']\n",
|
|
|
|
|
|
"['PetalMD']\n",
|
|
|
|
|
|
"['GomezLee', 'Marketing']\n",
|
|
|
|
|
|
"['PSC', 'Consulting']\n",
|
|
|
|
|
|
"['STATCARE', 'URGENT', 'CARE']\n",
|
|
|
|
|
|
"['Tru', 'Realty']\n",
|
|
|
|
|
|
"['Netradyne']\n",
|
|
|
|
|
|
"['H1', 'Insights,', 'Inc.']\n",
|
|
|
|
|
|
"['Hardskills', 'Pte.', 'Ltd']\n",
|
|
|
|
|
|
"['BambooHR']\n",
|
|
|
|
|
|
"['Velocity', 'Risk', 'Underwriters,', 'LLC']\n",
|
|
|
|
|
|
"['Harri', 'LLC']\n",
|
|
|
|
|
|
"['Rootstrap']\n",
|
|
|
|
|
|
"['First', 'Mile']\n",
|
|
|
|
|
|
"['Lawpath']\n",
|
|
|
|
|
|
"['Brand', 'Addition']\n",
|
|
|
|
|
|
"['Roboflow']\n",
|
|
|
|
|
|
"['Oiltanking', 'GmbH']\n",
|
|
|
|
|
|
"['Safari', 'Energy']\n",
|
|
|
|
|
|
"['Larson', 'Texts']\n",
|
|
|
|
|
|
"['Spring', 'Point']\n",
|
|
|
|
|
|
"['Legislative', 'Assembly', 'of', 'British', 'Columbia']\n",
|
|
|
|
|
|
"['National', 'Campus', 'and', 'Community', 'Radio', 'Association']\n",
|
|
|
|
|
|
"['Psynapse', 'Psychometrics', 'Pty', 'Ltd']\n",
|
|
|
|
|
|
"['Portnoy,', 'Messinger,', 'Pearl', '&', 'Associates,', 'Inc.']\n",
|
|
|
|
|
|
"['Trax', 'Technology', 'Solutions', 'Pte', 'Ltd']\n",
|
|
|
|
|
|
"['Uber', 'London', 'Limited', '(POOL)']\n",
|
|
|
|
|
|
"['Ansan', 'Industries', 'Ltd.']\n",
|
|
|
|
|
|
"['connectRN', 'Inc.']\n",
|
|
|
|
|
|
"['Usdan', 'Summer', 'Camp', 'for', 'the', 'Arts']\n",
|
|
|
|
|
|
"['Carlson', 'Capital', 'Management']\n",
|
|
|
|
|
|
"['Safari', 'Energy']\n",
|
|
|
|
|
|
"['Surmesur']\n",
|
|
|
|
|
|
"['connectRN', 'Inc.']\n",
|
|
|
|
|
|
"['ManyChat,', 'Inc.']\n",
|
|
|
|
|
|
"['Service', 'Coordination', 'Unlimited']\n",
|
|
|
|
|
|
"['Doximity', 'Inc.']\n",
|
|
|
|
|
|
"['Starwood', 'Pet', 'Travel']\n",
|
|
|
|
|
|
"['Future', 'Point', 'of', 'View', '(Tri-Corps)']\n",
|
|
|
|
|
|
"['Form', 'Energy']\n",
|
|
|
|
|
|
"['Channable']\n",
|
|
|
|
|
|
"['Hammer', 'Nutrition']\n",
|
|
|
|
|
|
"['Uber', 'B.V.', '-', 'Uber', 'LatAm', 'Compliance']\n",
|
|
|
|
|
|
"['Securonix', 'Inc.']\n",
|
|
|
|
|
|
"['HopSkipDrive']\n",
|
|
|
|
|
|
"['Kiwi']\n",
|
|
|
|
|
|
"['People', 'Untapped']\n",
|
|
|
|
|
|
"['beqom']\n",
|
|
|
|
|
|
"['Casio', 'America', 'Inc']\n",
|
|
|
|
|
|
"['Riviera', 'Partners', 'LLC']\n",
|
|
|
|
|
|
"['The', 'Latimer', 'Group']\n",
|
|
|
|
|
|
"['Viveve']\n",
|
|
|
|
|
|
"['Volt', 'Systems']\n",
|
|
|
|
|
|
"['OTHRSource']\n",
|
|
|
|
|
|
"['Viveve']\n",
|
|
|
|
|
|
"['Material', 'Control', 'Systems,', 'Inc.']\n",
|
|
|
|
|
|
"['Evolve', 'IP']\n",
|
|
|
|
|
|
"['Cook', 'Brothers', 'Bars']\n",
|
|
|
|
|
|
"['STOPit', 'Solutions']\n",
|
|
|
|
|
|
"['Broche', 'Ballet']\n",
|
|
|
|
|
|
"['Virtually', 'Human', 'Studio']\n",
|
|
|
|
|
|
"['BrightLine', 'Eating', 'Solutions', 'LLC']\n",
|
|
|
|
|
|
"['AgriWebb']\n",
|
|
|
|
|
|
"['Wingz,', 'Inc.']\n",
|
|
|
|
|
|
"['Noventiq', 'Holdings', 'plc']\n",
|
|
|
|
|
|
"['Corporate', 'Traveler', 'UK']\n",
|
|
|
|
|
|
"['Kadince']\n",
|
|
|
|
|
|
"['AndHealth']\n",
|
|
|
|
|
|
"['Swift', 'Medical']\n",
|
|
|
|
|
|
"['Agility', 'Inc.']\n",
|
|
|
|
|
|
"['ZyXel', 'Communications', 'Inc']\n",
|
|
|
|
|
|
"['UX', 'Design', 'Institute']\n",
|
|
|
|
|
|
"['STATCARE', 'URGENT', 'CARE']\n",
|
|
|
|
|
|
"['MaRS', 'Discovery', 'District']\n",
|
|
|
|
|
|
"['Altima', 'Dental', 'Canada']\n",
|
|
|
|
|
|
"['Aquent']\n",
|
|
|
|
|
|
"['FeverBee']\n",
|
|
|
|
|
|
"['Company', 'Nurse']\n",
|
|
|
|
|
|
"['DoubleVerify']\n",
|
|
|
|
|
|
"['Pepper', 'Content', 'Inc']\n",
|
|
|
|
|
|
"['NextHealth']\n",
|
|
|
|
|
|
"['Community', 'Transportation', 'Association', 'of', 'America']\n",
|
|
|
|
|
|
"['Teach', 'Plus']\n",
|
|
|
|
|
|
"['Winchester', 'Carlisle']\n",
|
|
|
|
|
|
"['Ceres', 'University']\n",
|
|
|
|
|
|
"['BrandActive', 'International', 'Inc.']\n",
|
|
|
|
|
|
"['Psychiatry-UK']\n",
|
|
|
|
|
|
"['Horsepower', 'Brands']\n",
|
|
|
|
|
|
"['Dept', 'Holding', 'B.V.']\n",
|
|
|
|
|
|
"['BioLife', 'Solutions,', 'Inc.']\n",
|
|
|
|
|
|
"['Walmart', 'Inc.']\n",
|
|
|
|
|
|
"['Conlon', 'Construction']\n",
|
|
|
|
|
|
"['Nana', 'Technologies', 'Inc.']\n",
|
|
|
|
|
|
"['Turo,', 'Inc.']\n",
|
|
|
|
|
|
"['BombBomb']\n",
|
|
|
|
|
|
"['The', 'Financial', 'Times', 'Limited']\n",
|
|
|
|
|
|
"['Southeastrans', 'Inc.']\n",
|
|
|
|
|
|
"['Type', 'A', 'Media']\n",
|
|
|
|
|
|
"['Jets', 'Gymnastics']\n",
|
|
|
|
|
|
"['Reconart']\n",
|
|
|
|
|
|
"['Excalibur', 'Data', 'Systems']\n",
|
|
|
|
|
|
"['Parkopoly']\n",
|
|
|
|
|
|
"['Strategos', 'International']\n",
|
|
|
|
|
|
"['Douglas', 'Elliman', 'Real', 'Estate']\n",
|
|
|
|
|
|
"['Oodrive']\n",
|
|
|
|
|
|
"['Swift', 'Medical']\n",
|
|
|
|
|
|
"['Canavan', 'Byrne']\n",
|
|
|
|
|
|
"['Dray', 'Alliance', 'Inc.']\n",
|
|
|
|
|
|
"['CanFi', 'Empowerment', 'Education', 'Corp']\n",
|
|
|
|
|
|
"['Axios', 'Media', 'Inc.']\n",
|
|
|
|
|
|
"['Volumental']\n",
|
|
|
|
|
|
"['Oceana,', 'Inc.']\n",
|
|
|
|
|
|
"['LaunchThat']\n",
|
|
|
|
|
|
"['UX', 'Design', 'Institute']\n",
|
|
|
|
|
|
"['Amyris']\n",
|
|
|
|
|
|
"['Shine', 'Bright', 'Care', 'LLC']\n",
|
|
|
|
|
|
"['Alston', 'Construction']\n",
|
|
|
|
|
|
"['Hello', 'Customer']\n",
|
|
|
|
|
|
"['S-Docs']\n",
|
|
|
|
|
|
"['Neil', 'Kelly', 'Company']\n",
|
|
|
|
|
|
"['Squaretrade', 'Inc.']\n",
|
|
|
|
|
|
"['Everyday', 'Software,', 'SL']\n",
|
|
|
|
|
|
"['Webdox', 'CLM']\n",
|
|
|
|
|
|
"['Hazlett', 'Tree', 'Service']\n",
|
|
|
|
|
|
"['High', 'Frequency']\n",
|
|
|
|
|
|
"['CloudRadial']\n",
|
|
|
|
|
|
"['Ideoclick']\n",
|
|
|
|
|
|
"['Equiem']\n",
|
|
|
|
|
|
"['Long', 'Beach', 'Community', 'College', 'District', '-', 'LA', 'SBDC']\n",
|
|
|
|
|
|
"['enChoice', 'Inc']\n",
|
|
|
|
|
|
"['Royal', 'Family', 'Kids,', 'Inc.']\n",
|
|
|
|
|
|
"['SilverCar']\n",
|
|
|
|
|
|
"['Anchor', 'QEA,', 'LLC']\n",
|
|
|
|
|
|
"['Maven', 'Clinic']\n",
|
|
|
|
|
|
"['Flight', 'Centre']\n",
|
|
|
|
|
|
"['Shoplogix', '(Asia', 'Pacific)', 'Limited', '(Formerly', 'Projection', 'HK)']\n",
|
|
|
|
|
|
"['North', 'Capital']\n",
|
|
|
|
|
|
"['Johnson', '&', 'Johnson', 'Surgical', 'Vision,', 'Inc.']\n",
|
|
|
|
|
|
"['Hexa', 'Assessments', 'dba', 'Talexes']\n",
|
|
|
|
|
|
"['Raketech', 'Group']\n",
|
|
|
|
|
|
"['EnsembleIQ']\n",
|
|
|
|
|
|
"['Ogi']\n",
|
|
|
|
|
|
"['Grammarly']\n",
|
|
|
|
|
|
"['PSC', 'Consulting']\n",
|
|
|
|
|
|
"['Trinity', 'College', 'London']\n",
|
|
|
|
|
|
"['Estes', 'Construction']\n",
|
|
|
|
|
|
"['FIVE', 'Holdings,', 'Inc']\n",
|
|
|
|
|
|
"[\"Women's\", 'Resource', 'Center']\n",
|
|
|
|
|
|
"['Pablo', '&', \"Rusty's\", 'Coffee']\n",
|
|
|
|
|
|
"['Lawyers', 'Committee', 'for', 'Civil', 'Rights', 'Under', 'Law']\n",
|
|
|
|
|
|
"['Richter10.2', 'Media', 'Group']\n",
|
|
|
|
|
|
"['Venair']\n",
|
|
|
|
|
|
"['SIT', 'International']\n",
|
|
|
|
|
|
"['Cultural', 'Care']\n",
|
|
|
|
|
|
"['DoubleVerify']\n",
|
|
|
|
|
|
"['Life', 'House']\n",
|
|
|
|
|
|
"['ASAPP']\n",
|
|
|
|
|
|
"['GLDN']\n",
|
|
|
|
|
|
"['Community', 'Fire', 'Prevention']\n",
|
|
|
|
|
|
"['Capillary', 'Technologies']\n",
|
|
|
|
|
|
"['Landis']\n",
|
|
|
|
|
|
"['Blacklane', 'GmbH']\n",
|
|
|
|
|
|
"['American', 'Concrete', 'Pipe', 'Association']\n",
|
|
|
|
|
|
"['The', 'CORE', 'Group']\n",
|
|
|
|
|
|
"['Market', 'Track', 'LLC', 'dba', 'Numerator']\n",
|
|
|
|
|
|
"['Medical', 'Practice', 'Management', 'Solutions']\n",
|
|
|
|
|
|
"['Recast', 'Software']\n",
|
|
|
|
|
|
"['Skan.ai']\n",
|
|
|
|
|
|
"['Smart', 'Choice']\n",
|
|
|
|
|
|
"['Hello', 'Insight']\n",
|
|
|
|
|
|
"['Agility', 'Robotics']\n",
|
|
|
|
|
|
"['American', 'State', 'Bank']\n",
|
|
|
|
|
|
"['Transifex', 'Opco', 'LLC']\n",
|
|
|
|
|
|
"['Cappfinity']\n",
|
|
|
|
|
|
"['Houwzer,', 'Inc']\n",
|
|
|
|
|
|
"['Feldman', 'Surveyors']\n",
|
|
|
|
|
|
"['CognisantMD']\n",
|
|
|
|
|
|
"['Clay', 'County', 'Clerk', 'of', 'Court', 'and', 'Comptroller', 'Office']\n",
|
|
|
|
|
|
"['Wiliot']\n",
|
|
|
|
|
|
"['Jimmy', 'Brings']\n",
|
|
|
|
|
|
"['AIIM']\n",
|
|
|
|
|
|
"['Wound', 'Care', 'Advantage']\n",
|
|
|
|
|
|
"['One', 'Network', 'Enterprises']\n",
|
|
|
|
|
|
"['Harri', 'LLC']\n",
|
|
|
|
|
|
"['Community', 'Transportation', 'Association', 'of', 'America']\n",
|
|
|
|
|
|
"['Puls']\n",
|
|
|
|
|
|
"['Shopify', 'Inc.']\n",
|
|
|
|
|
|
"['Semantic', 'Web', 'Company', 'GmbH']\n",
|
|
|
|
|
|
"['EF', 'Educational', 'Foundation', 'for', 'Foreign', 'Study']\n",
|
|
|
|
|
|
"['Upland', 'Software', 'Inc.']\n",
|
|
|
|
|
|
"['Global', 'Systems', 'Engineering']\n",
|
|
|
|
|
|
"['G2.com,', 'Inc']\n",
|
|
|
|
|
|
"['Petroleum', 'Analyzer', 'Company']\n",
|
|
|
|
|
|
"['Quality', 'Built']\n",
|
|
|
|
|
|
"['Energy', 'Foundation']\n",
|
|
|
|
|
|
"['ManyChat,', 'Inc.']\n",
|
|
|
|
|
|
"['People', 'Untapped']\n",
|
|
|
|
|
|
"['Optimize', 'Wealth', 'Management']\n",
|
|
|
|
|
|
"['Perchwell']\n",
|
|
|
|
|
|
"['Rystad', 'Energy']\n",
|
|
|
|
|
|
"['DesktopShipper']\n",
|
|
|
|
|
|
"['New', 'England', 'Mechanical', 'Overlay', '(NEMO)']\n",
|
|
|
|
|
|
"['Scinapsis', 'Analytics', 'Inc.', 'd.b.a.', 'BenchSci']\n",
|
|
|
|
|
|
"['Eagle', 'Investment', 'Systems', 'LLC']\n",
|
|
|
|
|
|
"['Falkonry', 'Inc.']\n",
|
|
|
|
|
|
"['Exor', 'International', '(USA)']\n",
|
|
|
|
|
|
"['Outside', 'In']\n",
|
|
|
|
|
|
"['UX', 'Design', 'Institute']\n",
|
|
|
|
|
|
"['Studex', 'Corporation']\n",
|
|
|
|
|
|
"['Givergy']\n",
|
|
|
|
|
|
"['Porch']\n",
|
|
|
|
|
|
"['StatesServ/Hospicelink']\n",
|
|
|
|
|
|
"['The', 'a2', 'Milk', 'Company']\n",
|
|
|
|
|
|
"['Bungii,', 'LLC']\n",
|
|
|
|
|
|
"['My', 'Food', 'Bag']\n",
|
|
|
|
|
|
"['Data', 'Words']\n",
|
|
|
|
|
|
"['Dr.', 'First']\n",
|
|
|
|
|
|
"['Sidekick', 'Health']\n",
|
|
|
|
|
|
"['Sirma', 'AI', 'AD,', 'trading', 'as', 'Ontotext']\n",
|
|
|
|
|
|
"['CHEK', 'Institute']\n",
|
|
|
|
|
|
"['Universal', 'Technical', 'Resource', 'Services,', 'Inc.']\n",
|
|
|
|
|
|
"['Falkonry', 'Inc.']\n",
|
|
|
|
|
|
"['MOLITOR', 'Formations']\n",
|
|
|
|
|
|
"['Sword', 'Security']\n",
|
|
|
|
|
|
"['Missouri', 'State', 'Employees', 'Retirement', 'System']\n",
|
|
|
|
|
|
"['Pickup', 'Now', 'Inc']\n",
|
|
|
|
|
|
"['Papa', 'Technologies', 'LLC']\n",
|
|
|
|
|
|
"['Marble', 'Law', 'Firm']\n",
|
|
|
|
|
|
"['Parchment,', 'Inc.']\n",
|
|
|
|
|
|
"['Well', 'Health,', 'Inc.', 'dba', 'Artera']\n",
|
|
|
|
|
|
"['Skuid']\n",
|
|
|
|
|
|
"['Veriff', 'OÜ']\n",
|
|
|
|
|
|
"['Soham,', 'Inc.', '(Zenoti)']\n",
|
|
|
|
|
|
"['Caliza', 'DBA', 'Homesie/Landing']\n",
|
|
|
|
|
|
"['iAdvize', 'SAS']\n",
|
|
|
|
|
|
"['Milner', 'International', 'College', 'of', 'English']\n",
|
|
|
|
|
|
"['Adkins', 'Management', 'and', 'Consulting']\n",
|
|
|
|
|
|
"['Partender,', 'Inc.']\n",
|
|
|
|
|
|
"['Hometime']\n",
|
|
|
|
|
|
"['National', 'Wildlife', 'Federation']\n",
|
|
|
|
|
|
"['Petland', 'Kennesaw']\n",
|
|
|
|
|
|
"['Petland', 'Mall', 'of', 'Georgia']\n",
|
|
|
|
|
|
"['Lash', 'Affair', 'by', 'J.', 'Paris,', 'LLC']\n",
|
|
|
|
|
|
"['Freshworks,', 'Inc.']\n",
|
|
|
|
|
|
"['Petland', 'Summerville']\n",
|
|
|
|
|
|
"['FacilisGroup']\n",
|
|
|
|
|
|
"['Clay', 'County', 'Clerk', 'of', 'Court', 'and', 'Comptroller', 'Office']\n",
|
|
|
|
|
|
"['Sweet', 'Cow', 'Ice', 'Cream']\n",
|
|
|
|
|
|
"['Kovo', 'Inc.']\n",
|
|
|
|
|
|
"['Anchor', 'Labs,', 'LLC']\n",
|
|
|
|
|
|
"['HINKLEY,', 'INC.']\n",
|
|
|
|
|
|
"['Solidus', 'Labs']\n",
|
|
|
|
|
|
"['Ogi']\n",
|
|
|
|
|
|
"['LTK']\n",
|
|
|
|
|
|
"['The', 'CX', 'Company']\n",
|
|
|
|
|
|
"['Wag', 'Labs,', 'Inc.']\n",
|
|
|
|
|
|
"['Paramount', 'Commerce']\n",
|
|
|
|
|
|
"['Akua', 'Mind', '&', 'Body']\n",
|
|
|
|
|
|
"['Anchain.ai']\n",
|
|
|
|
|
|
"['Pipedrive,', 'Inc.']\n",
|
|
|
|
|
|
"['Canavan', 'Byrne']\n",
|
|
|
|
|
|
"['Alston', 'Construction']\n",
|
|
|
|
|
|
"['Keystone', 'RV', 'Company']\n",
|
|
|
|
|
|
"['LJ', 'Hooker', 'Corporation', 'Pty', 'Ltd']\n",
|
|
|
|
|
|
"['Zenoti(Toni', '&', 'Guy)']\n",
|
|
|
|
|
|
"['National', 'Contracting', 'Center']\n",
|
|
|
|
|
|
"['Wild', 'Health']\n",
|
|
|
|
|
|
"['TripleSeat']\n",
|
|
|
|
|
|
"['Cloudticity']\n",
|
|
|
|
|
|
"['OvalEdge']\n",
|
|
|
|
|
|
"['ForceManager']\n",
|
|
|
|
|
|
"['Remind']\n",
|
|
|
|
|
|
"['LoadUp', 'Technologies,', 'LLC']\n",
|
|
|
|
|
|
"['Zenjob', 'GmbH']\n",
|
|
|
|
|
|
"['AgencyBloc']\n",
|
|
|
|
|
|
"['Serge']\n",
|
|
|
|
|
|
"['English', 'for', 'Asia']\n",
|
|
|
|
|
|
"['uConnect']\n",
|
|
|
|
|
|
"['Plan', 'To', 'Protect']\n",
|
|
|
|
|
|
"['Bonfire', 'Interactive', 'Ltd.']\n",
|
|
|
|
|
|
"['RogueHire,', 'LLC']\n",
|
|
|
|
|
|
"['Total', 'Dealer', 'Compliance']\n",
|
|
|
|
|
|
"['SalesForce,', 'Inc.']\n",
|
|
|
|
|
|
"['STT', 'Security', '&', 'Investigative', 'Services']\n",
|
|
|
|
|
|
"['Encore', 'Healthcare']\n",
|
|
|
|
|
|
"['Engel', '&', 'Völkers', 'Americas,', 'Inc']\n",
|
|
|
|
|
|
"['Herrmann']\n",
|
|
|
|
|
|
"['Cayuse']\n",
|
|
|
|
|
|
"['Ursa', 'Major', 'Technologies']\n",
|
|
|
|
|
|
"['The', 'Federal', 'Liberal', 'Agency', 'of', 'Canada']\n",
|
|
|
|
|
|
"['Company', 'Nurse']\n",
|
|
|
|
|
|
"['WeTravel']\n",
|
|
|
|
|
|
"['MarketScale']\n",
|
|
|
|
|
|
"['Mizuno', 'Usa,', 'Inc.']\n",
|
|
|
|
|
|
"['At', 'World', 'Properties,', 'LLC']\n",
|
|
|
|
|
|
"['Talener']\n",
|
|
|
|
|
|
"['Practo', 'PTE', 'Ltd.']\n",
|
|
|
|
|
|
"['Prenda']\n",
|
|
|
|
|
|
"['Arrcus']\n",
|
|
|
|
|
|
"['Yondr', 'USA', 'LLC']\n",
|
|
|
|
|
|
"['Scrivas']\n",
|
|
|
|
|
|
"['AutoServe1']\n",
|
|
|
|
|
|
"['InvestisDigital']\n",
|
|
|
|
|
|
"['Eldersource']\n",
|
|
|
|
|
|
"['Dialog']\n",
|
|
|
|
|
|
"['Own', 'Solutions', 'Financial', 'Services', 'Limited']\n",
|
|
|
|
|
|
"['Crayon']\n",
|
|
|
|
|
|
"['Slope', 'Software']\n",
|
|
|
|
|
|
"['Rokt', 'Pte', 'Ltd']\n",
|
|
|
|
|
|
"['CyberCube']\n",
|
|
|
|
|
|
"['Everlaw']\n",
|
|
|
|
|
|
"['Compass', 'RE']\n",
|
|
|
|
|
|
"['Solo']\n",
|
|
|
|
|
|
"['Dolly', 'Inc.']\n",
|
|
|
|
|
|
"['LTK']\n",
|
|
|
|
|
|
"['Access', 'E-Forms']\n",
|
|
|
|
|
|
"['Training', 'Concepts']\n",
|
|
|
|
|
|
"['Luma', 'Health,', 'Inc.']\n",
|
|
|
|
|
|
"['FRSecure']\n",
|
|
|
|
|
|
"['MedicalDirector']\n",
|
|
|
|
|
|
"nan\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"arr.index.astype(str)\n",
|
|
|
|
|
|
"words = arr.index.str.split()\n",
|
|
|
|
|
|
"for word in words:\n",
|
|
|
|
|
|
" print(word)"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 30,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"354\n",
|
|
|
|
|
|
"509\n",
|
|
|
|
|
|
"nan\n",
|
|
|
|
|
|
"<class 'str'>\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"acct_list = []\n",
|
|
|
|
|
|
"acct_list = arr.index.to_list()\n",
|
|
|
|
|
|
"acct_list2 = pages.index.to_list()\n",
|
|
|
|
|
|
"print(len(acct_list))\n",
|
|
|
|
|
|
"print(len(acct_list2))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"for item2 in acct_list2:\n",
|
|
|
|
|
|
" acct_list.append(item2)\n",
|
2023-04-12 08:36:25 -04:00
|
|
|
|
"\n",
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"for item in acct_list:\n",
|
|
|
|
|
|
" if type(item) == float:\n",
|
|
|
|
|
|
" acct_list.remove(item)\n",
|
|
|
|
|
|
" item = str(item)\n",
|
|
|
|
|
|
" print(item)\n",
|
|
|
|
|
|
" print(type(item))\n",
|
|
|
|
|
|
" \n"
|
2023-04-12 08:36:25 -04:00
|
|
|
|
]
|
|
|
|
|
|
},
|
2023-04-13 17:05:51 -04:00
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"execution_count": 31,
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [
|
|
|
|
|
|
{
|
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
|
"text": [
|
|
|
|
|
|
"SilverCar Silvercar\n",
|
|
|
|
|
|
"Landis LandisU\n",
|
|
|
|
|
|
"AIIM AIIM+\n",
|
|
|
|
|
|
"DesktopShipper Desktop Shipper\n",
|
|
|
|
|
|
"TripleSeat Tripleseat\n",
|
|
|
|
|
|
"InvestisDigital Investis Digital\n",
|
|
|
|
|
|
"Bolt Volt\n",
|
|
|
|
|
|
"Landis LandisU\n",
|
|
|
|
|
|
"Ocean Oceana\n",
|
|
|
|
|
|
"Skuad Skool Skuid Skool\n",
|
|
|
|
|
|
"10\n"
|
|
|
|
|
|
]
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"source": [
|
|
|
|
|
|
"tup_list = []\n",
|
|
|
|
|
|
"for (name1, name2,) in itertools.combinations(acct_list, 2):\n",
|
|
|
|
|
|
" distance = lev(name1, name2)\n",
|
|
|
|
|
|
" if distance > 0 and distance < 2:\n",
|
|
|
|
|
|
" print(name1, name2)\n",
|
|
|
|
|
|
" tup_list.append((name1, name2))\n",
|
|
|
|
|
|
"\n",
|
|
|
|
|
|
"print(len(tup_list))"
|
|
|
|
|
|
]
|
|
|
|
|
|
},
|
|
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
"metadata": {},
|
2023-06-05 17:09:23 -04:00
|
|
|
|
"outputs": [],
|
2023-04-13 17:05:51 -04:00
|
|
|
|
"source": []
|
|
|
|
|
|
},
|
2023-04-12 08:36:25 -04:00
|
|
|
|
{
|
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
|
"metadata": {},
|
|
|
|
|
|
"outputs": [],
|
|
|
|
|
|
"source": []
|
|
|
|
|
|
}
|
|
|
|
|
|
],
|
|
|
|
|
|
"metadata": {
|
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
|
"display_name": "jupyter",
|
|
|
|
|
|
"language": "python",
|
|
|
|
|
|
"name": "python3"
|
|
|
|
|
|
},
|
|
|
|
|
|
"language_info": {
|
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
|
"version": 3
|
|
|
|
|
|
},
|
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
|
"name": "python",
|
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
|
"version": "3.11.1"
|
|
|
|
|
|
},
|
|
|
|
|
|
"orig_nbformat": 4
|
|
|
|
|
|
},
|
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
|
}
|