1005 lines
41 KiB
Plaintext
1005 lines
41 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd \n",
|
||
"import numpy as np \n",
|
||
"import seaborn as sns \n",
|
||
"import matplotlib.pylab as plt \n",
|
||
"import sketch\n",
|
||
"plt.style.use('ggplot')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pages = pd.read_csv(\"/Users/normrasmussen/Downloads/app_usage.csv\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(59433, 3)"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pages.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>School Name</th>\n",
|
||
" <th>Path</th>\n",
|
||
" <th>Mar 13 - Apr 11</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/courses</td>\n",
|
||
" <td>25</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/previews</td>\n",
|
||
" <td>16</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/account_links</td>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/analytics</td>\n",
|
||
" <td>8</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/settings/general/edit</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/apps/integrations</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/accounts/general/edit</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/people</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/settings/authentication</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>Vivvix University for Ad Intel</td>\n",
|
||
" <td>/styling/general/edit</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" School Name Path Mar 13 - Apr 11\n",
|
||
"0 Vivvix University for Ad Intel /courses 25\n",
|
||
"1 Vivvix University for Ad Intel /previews 16\n",
|
||
"2 Vivvix University for Ad Intel /account_links 11\n",
|
||
"3 Vivvix University for Ad Intel /analytics 8\n",
|
||
"4 Vivvix University for Ad Intel /settings/general/edit 7\n",
|
||
"5 Vivvix University for Ad Intel /apps/integrations 4\n",
|
||
"6 Vivvix University for Ad Intel /accounts/general/edit 4\n",
|
||
"7 Vivvix University for Ad Intel /people 3\n",
|
||
"8 Vivvix University for Ad Intel /settings/authentication 3\n",
|
||
"9 Vivvix University for Ad Intel /styling/general/edit 2"
|
||
]
|
||
},
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pages.head(10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pages_sum = pd.DataFrame(columns=['Total Views'])\n",
|
||
"pages_sum['Total Views'] = pages.groupby(by='School Name')['Mar 13 - Apr 11'].sum()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Total Views</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>School Name</th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Vivvix for Ad Intel</th>\n",
|
||
" <td>110</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>'@properties</th>\n",
|
||
" <td>322</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4 Pillars</th>\n",
|
||
" <td>112</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>AIIM+</th>\n",
|
||
" <td>528</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>ASAPP</th>\n",
|
||
" <td>153</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>| AgencyBloc</th>\n",
|
||
" <td>3373</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Académie Évolupharma</th>\n",
|
||
" <td>253</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Access</th>\n",
|
||
" <td>11</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>ActiveLearner</th>\n",
|
||
" <td>1907</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Ad Intel</th>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Total Views\n",
|
||
"School Name \n",
|
||
" Vivvix for Ad Intel 110\n",
|
||
"'@properties 322\n",
|
||
"4 Pillars 112\n",
|
||
"AIIM+ 528\n",
|
||
"ASAPP 153\n",
|
||
" | AgencyBloc 3373\n",
|
||
"Académie Évolupharma 253\n",
|
||
"Access 11\n",
|
||
"ActiveLearner 1907\n",
|
||
"Ad Intel 2"
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"to_remove = ['Academy', 'academy', 'University', 'university', 'school', 'School', 'Sandbox', 'Knowledge Base']\n",
|
||
"for item in pages_sum.itertuples(index=True):\n",
|
||
" for word in to_remove:\n",
|
||
" if word in item[0]:\n",
|
||
" less = item[0].replace(word, '')\n",
|
||
" pages_sum.rename(index={item[0]:less}, inplace=True)\n",
|
||
"\n",
|
||
"pages_sum.head(10)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pages_sum.index = pages_sum.index.str.strip()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'Total Views': {'Vivvix for Ad Intel': 110, \"'@properties\": 322, '4 Pillars': 112, 'AIIM+': 528, 'ASAPP': 153, '| AgencyBloc': 3373, 'Académie Évolupharma': 253, 'Access': 11, 'ActiveLearner': 1907, 'Ad Intel': 765, 'AdCellerant': 5, 'Adkins': 107, 'Agility': 156, 'AgriWebb': 567, 'AgriWebb Brazil': 333, 'Akua Mind Body': 578, 'AlphaSense': 17, 'Altenew': 859, 'Altima Dental': 33, 'American Concrete Pipe Association Learning Center': 2864, 'AnChain.AI': 437, 'AndHealth's': 2628, 'Andhealth Internal Training': 17, 'Ansley': 116, 'Anthology': 13, 'Anthology Internal': 104, 'Anthology Trial': 418, 'Aquent's Learning Lab': 16, 'Arrcus': 8, 'Artera': 54, 'Ascend Training by Stenograph': 9, 'AtlasIED': 125, 'Atlassian': 41, 'AutoServe1': 4, 'Autoserve1 New': 1, 'BCNDP Caucus': 26, 'BLVD Mentor Learning Library': 223, 'BNY Mellon OMNI Digital': 364, 'Bartosz's': 3, 'Bellhop': 118, 'Belmont Farm Nursery': 29, 'BenchSci': 1659, 'Big Ideas Learning': 167, 'Big Ideas Learning Sandbox': 7, 'BirchStreet': 11, 'BirchStreet Trial': 446, 'Bisla': 402, 'Blacklane Chauffeur Learning': 263, 'Blacklane Dubai Chauffeurs': 272, 'Blacklane Learning & Development': 1913, 'Blackstone Industrial': 42, 'Bolt': 1177, 'BombBomb Studios': 673, 'Bonfire': 19, 'Boon Edam': 164, 'Brand Addition's': 457, 'Brian's Starfleet': 3, 'Brighton Science': 1703, 'Bungii Operations': 155, 'CATalyst': 161, 'CHEK Institute': 11138, 'CIBC FirstCaribbean Online and Mobile Banking': 5, 'CIRE': 149, 'CMIC': 200, 'CONCEPT ACADEMY': 468, 'CORE College': 1419, 'CRM Training Courses and Tutorials | Pipedrive Learn': 984, 'CSNews Technology': 1, 'CTAA's': 1947, 'Canavan Byrne': 1380, 'Capillary': 1473, 'Cappfinity': 108, 'Capstone Scholars Program': 456, 'Cayuse': 99, 'Censis': 1300, 'Channable Internal': 551, 'Channable': 40, 'Chann': 152, 'Chico Electric': 135, 'Cholodecki current': 1, 'Christian': 5, 'Christie's International Real Estate pl@tform': 273, 'ChurnZero': 41, 'Clay Clerk': 525, 'CloudRadial': 455, 'Cloudticity': 27, 'Cluid Housing': 65, 'Coastal Consulting': 114, 'Cofense': 129, 'Community Fire Prevention': 37, 'Compass': 4327, 'Compass U': 265, 'Competitive Advantage': 48, 'Con-Trol Training System': 816, 'Con-Trol Training System - BambooHR': 179, 'Conlon's': 598, 'Cook Brother's Bars': 361, 'Corporate Traveler': 3, 'Curso de Formação': 93, 'CurveBeam': 87, 'DELETE - Airbnb Host Hub': 12, 'DELETE - Airbnb Photography': 13, 'DELETE - BLE Dev': 14, 'DELETE - Bright Line Hub': 24, 'DELETE - Bright Liners Hub': 1020, 'DELETE - Dealerware': 14, 'DELETE - Dynamic Yield': 14, 'DELETE - Gett': 13, 'DELETE - H1 Insights Customer': 21, 'DELETE - H1 Insights Inc': 47, 'DELETE - Happy Culture': 15, 'DELETE - Numerator for Promotions Intel': 15, 'DELETE - OTHRSource': 16, 'DELETE - PeopleDev': 14, 'DELETE - Pixability's': 1, 'DELETE - SANDBOX Bright Liners Hub': 28, 'DELETE - The GLAMSQUAD Online Learning Center': 16, 'DELETE - Uber Compliance El Salvador': 14, 'DELETE - Uber Ecuador': 13, 'DELETE - Uber Peru': 19, 'DELETE - Uber Peru Compliance': 14, 'DELETE - Yotpo': 13, 'DV Learning': 601, 'Datawords': 221, 'Dell AMD Learning': 1192, 'Dept Agency': 575, 'Desktop Shipper': 875, 'Devils': 9, 'Dolly': 23, 'DoorDash Shop & Deliver': 10, 'DoorDash's': 592, 'DoubleVerify': 89, 'Douglas Elliman MyLearning.Elliman.com': 943, 'Doximity Compliance Learning': 27, 'Doximity': 183, 'Dray Alliance': 207, 'EASE': 373, 'EF High Exchange Year: IEC': 1703, 'EF High Exchange Year: Staff': 6, 'EF host family resource center': 29, 'EF student learning center': 74, 'ERA Key Realty Services': 66, 'ES&E': 329, 'EXOR & CORVINA': 189, 'ElderSource Ecommerce': 102, 'ElderSource Internal': 16, 'ElderSource Partner': 4, 'Election Protection': 39, 'Embee': 4, 'Encore Healthcare': 290, 'Energy Foundation': 490, 'Engel & Völkers': 566, 'English for Asia': 2270, 'Equip By Equiem': 143, 'Essentium Training Center': 11, 'Everlaw Certification Center': 33, 'Evolve IP': 426, 'Excalibur Data Systems Online Learning': 123, 'FBD Frozen': 27, 'FCA': 1061, 'FCTG Americas': 3474, 'FRSecure': 407, 'Facilis': 308, 'Factorial': 1593, 'Feldman Geospatial Learning Management System': 289, 'FeverBee's Professional Community Management Courses': 11, 'Figma': 1, 'First Mile's': 279, 'Flink': 1472, 'Foleon': 1776, 'Football Marketing Asia': 12, 'For The Children': 319, 'Force': 3, 'Formation Planifions pour protégerMC': 46, 'Formation de l'Équipe Trudeau': 50, 'Founder Portal': 3, 'Freshworks': 259, 'Front': 975, 'Futuri': 152, 'G2's': 293, 'GLAS Agency': 4, 'Garrett Virtual': 376, 'GenTech Associates': 4599, 'GetPixel': 1969, 'Givergy': 146, 'Glassdoor': 1112, 'Global Systems Engineering': 25, 'GoGuardian': 94, 'GomezLee Marketing': 3, 'Grammarly': 15, 'Grow WELL': 907, 'HARDSKILLS': 2338, 'HARDSKILLS-DEMO': 442, 'HELPme': 163, 'HackerRank': 164, 'Hanna': 1440, 'Harri': 609, 'Harri Employee': 8, 'Harri's for McDonalds': 2482, 'Harri's Client Onboarding (IPC)': 18, 'Harri's Journey to Success': 412, 'HealthFirst Training': 286, 'Healthiverse': 2492, 'Hello Insight': 1, 'Herrmann Asia Learning Portal': 1681, 'Hinkley': 535, 'Hometime Hub': 184, 'HopSkipDrive Resources': 780, 'Hornbill': 2, 'Horsepower Brands': 2540, 'Houwzer': 7, 'Hubert dykiel's': 8, 'Humly': 656, 'Hunt Real Estate': 839, 'ISOutsource': 125, 'IceMalta': 148, 'Internal HackerRank': 119, 'Investis Digital': 188, 'Jets Gymnastics': 369, 'Jimmy Brings': 506, 'Jonny davies's': 3, 'Kadince': 203, 'Keystone RV Company': 114, 'Knox': 90, 'Kovo': 5, 'LJ Hooker': 48, 'LTK': 563, 'Landis Customer': 10, 'Landis Internal': 26, 'Landis': 30, 'LandisU': 30, 'Lash Affair': 56, 'LaunchThat': 263, 'Lawpath': 193, 'Learn.kiwi.com - Learning and Development': 623, 'Learning Lab': 99, 'Levi green's': 7, 'Library': 69, 'Life House': 109, 'Light Brigade': 302, 'Lighthouse by LJ Hooker Group': 1378, 'Lightspeed': 4, 'LiquidSpace's': 2, 'Little Harvard': 1, 'LoadUp': 183, 'Lukasz cholodecki's': 6, 'Luma Health U': 1, 'Luma U': 3459, 'Luminate': 1, 'Luvo': 594, 'MANTA': 13, 'MANTA Hub': 6, 'MASSÉNA Formations': 4009, 'MC Personalization eCampus': 977, 'MaRS Discovery District': 463, 'Magis Center': 2136, 'Malta International Airport | Northpass eLearning Hub': 1084, 'Marble Law': 538, 'Mark43 Institute': 152, 'Mark43': 323, 'MarketScale Creator Community': 499, 'Marketing Pony': 1, 'Marketscale Director's Chair': 20, 'Marta's': 133, 'Matt cochran's': 62, 'Mattr': 31, 'Maven Care Team': 1579, 'Maven Clinic's': 92, 'MedicalDirector's': 62, 'Meditech': 8, 'MicroFour': 173, 'Minga': 39, 'Missouri State Employees Retirement System': 1, 'Mizuno': 895, 'Mizuno Running': 195, 'Motrain': 427, 'Murphy Geospatial Northpass Account': 3, 'My Food Bag': 49, 'My Meeting Courses': 767, 'MyAuPairCourses': 103, 'MyEquip By Equiem': 352, 'NCC's': 15, 'NCC's Medicare Sales': 518, 'NEMO Safety Training': 255, 'Nana': 270, 'Napa Center': 110, 'Neil Kelly': 256, 'Nest U': 434, 'Netradyne': 1485, 'Network First': 1738, 'Network Q': 867, 'Next Theme Test': 7, 'NextIQ': 122, 'North Capital': 7, 'Northpass Catalog': 28, 'Northpass Product Training': 37, 'Northpass for Employees': 65, 'Noventiq': 86, 'Ocean': 245, 'Oceana': 26, 'Ogi': 156, 'Oiltanking Training': 847, 'Omnisend': 123, 'OnDemand by enChoice': 7, 'Onclusive': 230, 'Ontotext': 10, 'Oodrive': 1590, 'OpenSesame': 7, 'Optimize': 24, 'Ornikar': 408, 'Otoqi': 397, 'Outside In': 909, 'OvalEdge': 213, 'Own Solutions and Aplauz': 223, 'PAC': 17, 'PAC -Team Learning': 2, 'PCTEL': 720, 'PICKUP HR': 19, 'PICKUP SOP Instance': 229, 'PICKUP': 14, 'PITM': 147, 'PSC's Professional Development Platform': 198, 'Pablo & Rusty's Online Education': 59, 'Pal': 179, 'Papa Internal': 2489, 'Parchment Learn': 394, 'Parting Stone': 3018, 'Path Onboarding': 1040, 'PebblePad': 172, 'People Untapped Online Learning Centre': 697, 'Pepper Content': 344, 'Perchwell': 42, 'Petal Solutions Inc.': 424, 'Phoy phen's': 2, 'Pipedrive': 236, 'Pipedrive Sandbox': 70, 'Pipedrive Learn': 5, 'Plan to Protect® On-Line Training': 1153, 'Plan to Protect® dwell Abuse Awareness Training (in partnership with New Apostolic Church Canada)': 14, 'PoolParty': 52, 'Porch': 140, 'Prenda': 818, 'Prism Maritime': 61, 'Procuro': 387, 'Psynapse Education': 32, 'Puls Tech Resource Center': 873, 'Puls Technician Resource Center': 1, 'Quality Built': 226, 'Raptor': 119, 'Real Estate': 1411, 'Recast': 1200, 'ReconArt Studio': 195, 'Red Lion Controls'': 258, 'Remind Learning Center': 11, 'Remind': 3, 'Renaissance': 25, 'Renaissance U': 53, 'Riviera Partners': 66, 'Rob's': 3, 'Roboflow': 15, 'Robot Bootcamp (Internal)': 433, 'Roosted': 233, 'Rootstrap': 1759, 'Ryan agnello's': 83, 'Rystad Energy Employee Training Center': 84, 'Rystad Energy': 179, 'Rystad Energy's Learning Center': 3059, 'S-Docs': 31, 'SIT Online': 140, 'STOPit SEL, Safety, & Wellness Training': 389, 'STOPit Solutions': 185, 'STT Security': 247, 'STUDEX Germany': 4, 'STUDEX Mexico': 3, 'STUDEX Poland': 4, 'STUDEX USA': 517, 'STUDEX United Kingdom': 289, 'Safic'': 105, 'Scenario': 958, 'of Rokt': 410, 'ScreenBeam Learning': 7, 'ScreenBeam's Global': 6, 'Scrivas': 133, 'Securonix': 1014, 'Securonix Internal': 12, 'Securonix Partner Program': 351, 'Serge': 1369, 'Service Coordination Unlimited': 325, 'Shine Bright Care Training': 37, 'ShipEx's': 248, 'Shipt Passport': 1034, 'Shopify Certification': 30, 'Shopify GO': 6, 'Shopify Ignite': 12, 'Shopify Partner': 1, 'Showdigs': 1137, 'Sidekick': 249, 'Silvercar': 29, 'SimplrFlex': 97, 'Skan': 263, 'Skin Laundry': 52, 'Skuad Skool': 461, 'Skuid Customer': 3, 'Skuid Skool': 674, 'Smart Start Training Modules': 6, 'SmartPM': 200, 'Solo Certified': 54, 'Solo Internal Training': 399, 'Spark Driver Resources Hub': 1093, 'SpedTrack': 197, 'Splash': 2, 'Spotlight': 1798, 'Spring Point': 249, 'SquareTrade GO': 99, 'Staff Courses': 21, 'StateServ Training': 2103, 'Stenograph': 3416, 'Stenograph Education Network': 16, 'Strategos': 239, 'SubItUp': 13, 'Swift': 843, 'Swift Medical': 57, 'Sword Security': 311, 'TONI&GUY LEARNING HUB': 1066, 'TZA': 47, 'Talent Finder Resource Hub': 53, 'Talkspace': 240, 'Talkspace - Northpass': 1031, 'Talkspace National Practice Provider Training': 46, 'Teach Plus': 142, 'Team Trudeau Training': 23, 'Terminus': 77, 'The CX': 874, 'The Heico Companies': 4, 'The Knowledge | MATTR': 1225, 'The Latimer Group': 92, 'The Recruiter': 1276, 'Tidel's': 848, 'Tomasz król's': 100, 'Toni&Guy MVP': 13, 'Toni&Guy Passport': 7, 'Tonkean': 11, 'Total Dealer Compliance': 110, 'Trackforce Valiant': 146, 'Training Center': 3205, 'Training Concepts': 842, 'Trexity': 168, 'Trinity College London: Trinity Teach': 19, 'Tripleseat': 274, 'Turo': 27, 'Turo Learning Hub': 95, 'Turo On-Call': 11, 'UTRS Learning Management System': 111, 'UX Design - Learning Paths': 1, 'UX Design Institute': 3583, 'Universal': 1345, 'Upland CXM': 28, 'Upland Support': 1572, 'Upland': 1138, 'Uptick': 235, 'Ursa Major Training': 1071, 'Venair': 1095, 'Veriff's': 78, 'Ververica': 119, 'Vision Excellence Institute': 2, 'Viva Wallet': 2365, 'Voigt Smith Innovation': 1, 'Volt': 408, 'Volt-staging': 18, 'Volumental': 912, 'WELL Health Partners': 25, 'Wag!': 15, 'Walmart Luminate's': 482, 'Walmart Luminate's (MIGRATED TO AZURE)': 21, 'Walmart': 29, 'WeTravel Courses': 111, 'Webdox': 78, 'Whitetail Properties Education Platform': 3604, 'Wild Health': 756, 'Wild Health Education Portal': 620, 'Wiliot': 22, 'Will kozinski's': 31, 'Williams Sonoma Trial': 9, 'Winchester Carlisle': 213, 'Wingz': 247, 'Women's Resource Center': 7, 'Yakademy': 1, 'Your Pathway Home': 3, 'ZENOTI': 42, 'ZU EWC': 14, 'ZU Sorbet': 8, 'Zenjob': 406, 'Zenoti Employee Training portal': 306, 'Zenoti QA': 70, 'Zenoti': 3639, 'Zion Engagement and Planning': 5, '[Sandbox] Charlie's': 15, '[Sandbox] Kaitlyn Folsom's': 2, 'beqom': 317, 'd.': 170, 'dbaPlatform': 82, 'iAdvize': 241, 'iConnect Training': 182, 'i': 467, 'iorad': 12, 'my LCC Courses!': 1410, 'occy': 207, 'uConnect': 133, 'Łukasz Wnęk - production': 29}}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"test_dict = pages_sum.to_dict()\n",
|
||
"print(test_dict)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Twotal</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>School Name</th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>'@properties</th>\n",
|
||
" <td>322</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4 Pillars</th>\n",
|
||
" <td>112</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>AIIM+</th>\n",
|
||
" <td>528</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>ASAPP</th>\n",
|
||
" <td>153</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Académie Évolupharma</th>\n",
|
||
" <td>253</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Yakademy</th>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Your Pathway Home</th>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>ZENOTI</th>\n",
|
||
" <td>42</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>ZU EWC</th>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>ZU Sorbet</th>\n",
|
||
" <td>8</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>489 rows × 1 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Twotal\n",
|
||
"School Name \n",
|
||
"'@properties 322\n",
|
||
"4 Pillars 112\n",
|
||
"AIIM+ 528\n",
|
||
"ASAPP 153\n",
|
||
"Académie Évolupharma 253\n",
|
||
"... ...\n",
|
||
"Yakademy 1\n",
|
||
"Your Pathway Home 3\n",
|
||
"ZENOTI 42\n",
|
||
"ZU EWC 14\n",
|
||
"ZU Sorbet 8\n",
|
||
"\n",
|
||
"[489 rows x 1 columns]"
|
||
]
|
||
},
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pages_three = pd.DataFrame(columns=['Twotal'])\n",
|
||
"pages_three['Twotal'] = pages_sum.groupby(['School Name'])['Total Views'].sum()\n",
|
||
"pages_three.head(-20)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pages_three.loc[pages_three.index == 'Zenjob']\n",
|
||
"acct_list = pages_three.index.to_list()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Bolt Volt\n",
|
||
"Landis LandisU\n",
|
||
"Ocean Oceana\n",
|
||
"Skuad Skool Skuid Skool\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from Levenshtein import distance as lev\n",
|
||
"import itertools\n",
|
||
"\n",
|
||
"acct1 = []\n",
|
||
"comp = []\n",
|
||
"for (\n",
|
||
" name1,\n",
|
||
" name2,\n",
|
||
") in itertools.combinations(acct_list, 2):\n",
|
||
" # print(name1, name2) - prints all pairs, working so far.\n",
|
||
" distance = lev(name1, name2)\n",
|
||
" # print(distance) - successfully returns numbers\n",
|
||
" if distance > 0 and distance < 2:\n",
|
||
" print(name1, name2)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Stored 'pages' (DataFrame)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"pages = pages_three\n",
|
||
"%store pages"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"arr = pd.read_csv(\"/Users/normrasmussen/Downloads/cust_arr.csv\", index_col=0, header=[0])\n",
|
||
"arr.fillna(0, inplace=True)\n",
|
||
"arr.replace(',','', regex=True, inplace=True)\n",
|
||
"arr['ARR'] = arr['ARR'].astype(int)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"arr = arr.drop(axis=1, columns=['Renewaldate Date', 'CSM'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(354, 1)"
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"arr.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"['Stuart']\n",
|
||
"['HelloTech', 'Inc.']\n",
|
||
"['Shopify', 'Inc.']\n",
|
||
"['Hardskills', 'Pte.', 'Ltd']\n",
|
||
"['Shipt']\n",
|
||
"['SPS', 'Commerce']\n",
|
||
"['CDP']\n",
|
||
"['Altenew']\n",
|
||
"['BrightMove']\n",
|
||
"['SITA', 'B.V.', 'c/o', 'Malta', 'International', 'Airport']\n",
|
||
"['Football', 'Marketing', 'Asia']\n",
|
||
"['Prism', 'Maritime']\n",
|
||
"['Skuid']\n",
|
||
"['SmartPM', 'Technologies']\n",
|
||
"['DoorDash,', 'Inc.']\n",
|
||
"['International', 'Care', 'Ltd.']\n",
|
||
"['BrightMove']\n",
|
||
"['Epiphany', 'Healthcare']\n",
|
||
"['Artsy']\n",
|
||
"['Évolupharma,', 'Inc.']\n",
|
||
"['UAB', 'Omnisend']\n",
|
||
"['Epiphany', 'Healthcare']\n",
|
||
"['Artsy']\n",
|
||
"['Renu', 'Contracting', 'Restoration']\n",
|
||
"['Minga']\n",
|
||
"['Renu', 'Contracting', 'Restoration']\n",
|
||
"['Viva', 'Online', 'Services', 'SA']\n",
|
||
"['Health', 'Current']\n",
|
||
"['Cherokee', 'Enterprises,', 'Inc.']\n",
|
||
"['Kinetica']\n",
|
||
"['Innovaccer', 'Inc.']\n",
|
||
"['Bellhop,', 'Inc']\n",
|
||
"['ISOutsource']\n",
|
||
"['Safic-Alcan']\n",
|
||
"['PetalMD']\n",
|
||
"['GomezLee', 'Marketing']\n",
|
||
"['PSC', 'Consulting']\n",
|
||
"['STATCARE', 'URGENT', 'CARE']\n",
|
||
"['Tru', 'Realty']\n",
|
||
"['Netradyne']\n",
|
||
"['H1', 'Insights,', 'Inc.']\n",
|
||
"['Hardskills', 'Pte.', 'Ltd']\n",
|
||
"['BambooHR']\n",
|
||
"['Velocity', 'Risk', 'Underwriters,', 'LLC']\n",
|
||
"['Harri', 'LLC']\n",
|
||
"['Rootstrap']\n",
|
||
"['First', 'Mile']\n",
|
||
"['Lawpath']\n",
|
||
"['Brand', 'Addition']\n",
|
||
"['Roboflow']\n",
|
||
"['Oiltanking', 'GmbH']\n",
|
||
"['Safari', 'Energy']\n",
|
||
"['Larson', 'Texts']\n",
|
||
"['Spring', 'Point']\n",
|
||
"['Legislative', 'Assembly', 'of', 'British', 'Columbia']\n",
|
||
"['National', 'Campus', 'and', 'Community', 'Radio', 'Association']\n",
|
||
"['Psynapse', 'Psychometrics', 'Pty', 'Ltd']\n",
|
||
"['Portnoy,', 'Messinger,', 'Pearl', '&', 'Associates,', 'Inc.']\n",
|
||
"['Trax', 'Technology', 'Solutions', 'Pte', 'Ltd']\n",
|
||
"['Uber', 'London', 'Limited', '(POOL)']\n",
|
||
"['Ansan', 'Industries', 'Ltd.']\n",
|
||
"['connectRN', 'Inc.']\n",
|
||
"['Usdan', 'Summer', 'Camp', 'for', 'the', 'Arts']\n",
|
||
"['Carlson', 'Capital', 'Management']\n",
|
||
"['Safari', 'Energy']\n",
|
||
"['Surmesur']\n",
|
||
"['connectRN', 'Inc.']\n",
|
||
"['ManyChat,', 'Inc.']\n",
|
||
"['Service', 'Coordination', 'Unlimited']\n",
|
||
"['Doximity', 'Inc.']\n",
|
||
"['Starwood', 'Pet', 'Travel']\n",
|
||
"['Future', 'Point', 'of', 'View', '(Tri-Corps)']\n",
|
||
"['Form', 'Energy']\n",
|
||
"['Channable']\n",
|
||
"['Hammer', 'Nutrition']\n",
|
||
"['Uber', 'B.V.', '-', 'Uber', 'LatAm', 'Compliance']\n",
|
||
"['Securonix', 'Inc.']\n",
|
||
"['HopSkipDrive']\n",
|
||
"['Kiwi']\n",
|
||
"['People', 'Untapped']\n",
|
||
"['beqom']\n",
|
||
"['Casio', 'America', 'Inc']\n",
|
||
"['Riviera', 'Partners', 'LLC']\n",
|
||
"['The', 'Latimer', 'Group']\n",
|
||
"['Viveve']\n",
|
||
"['Volt', 'Systems']\n",
|
||
"['OTHRSource']\n",
|
||
"['Viveve']\n",
|
||
"['Material', 'Control', 'Systems,', 'Inc.']\n",
|
||
"['Evolve', 'IP']\n",
|
||
"['Cook', 'Brothers', 'Bars']\n",
|
||
"['STOPit', 'Solutions']\n",
|
||
"['Broche', 'Ballet']\n",
|
||
"['Virtually', 'Human', 'Studio']\n",
|
||
"['BrightLine', 'Eating', 'Solutions', 'LLC']\n",
|
||
"['AgriWebb']\n",
|
||
"['Wingz,', 'Inc.']\n",
|
||
"['Noventiq', 'Holdings', 'plc']\n",
|
||
"['Corporate', 'Traveler', 'UK']\n",
|
||
"['Kadince']\n",
|
||
"['AndHealth']\n",
|
||
"['Swift', 'Medical']\n",
|
||
"['Agility', 'Inc.']\n",
|
||
"['ZyXel', 'Communications', 'Inc']\n",
|
||
"['UX', 'Design', 'Institute']\n",
|
||
"['STATCARE', 'URGENT', 'CARE']\n",
|
||
"['MaRS', 'Discovery', 'District']\n",
|
||
"['Altima', 'Dental', 'Canada']\n",
|
||
"['Aquent']\n",
|
||
"['FeverBee']\n",
|
||
"['Company', 'Nurse']\n",
|
||
"['DoubleVerify']\n",
|
||
"['Pepper', 'Content', 'Inc']\n",
|
||
"['NextHealth']\n",
|
||
"['Community', 'Transportation', 'Association', 'of', 'America']\n",
|
||
"['Teach', 'Plus']\n",
|
||
"['Winchester', 'Carlisle']\n",
|
||
"['Ceres', 'University']\n",
|
||
"['BrandActive', 'International', 'Inc.']\n",
|
||
"['Psychiatry-UK']\n",
|
||
"['Horsepower', 'Brands']\n",
|
||
"['Dept', 'Holding', 'B.V.']\n",
|
||
"['BioLife', 'Solutions,', 'Inc.']\n",
|
||
"['Walmart', 'Inc.']\n",
|
||
"['Conlon', 'Construction']\n",
|
||
"['Nana', 'Technologies', 'Inc.']\n",
|
||
"['Turo,', 'Inc.']\n",
|
||
"['BombBomb']\n",
|
||
"['The', 'Financial', 'Times', 'Limited']\n",
|
||
"['Southeastrans', 'Inc.']\n",
|
||
"['Type', 'A', 'Media']\n",
|
||
"['Jets', 'Gymnastics']\n",
|
||
"['Reconart']\n",
|
||
"['Excalibur', 'Data', 'Systems']\n",
|
||
"['Parkopoly']\n",
|
||
"['Strategos', 'International']\n",
|
||
"['Douglas', 'Elliman', 'Real', 'Estate']\n",
|
||
"['Oodrive']\n",
|
||
"['Swift', 'Medical']\n",
|
||
"['Canavan', 'Byrne']\n",
|
||
"['Dray', 'Alliance', 'Inc.']\n",
|
||
"['CanFi', 'Empowerment', 'Education', 'Corp']\n",
|
||
"['Axios', 'Media', 'Inc.']\n",
|
||
"['Volumental']\n",
|
||
"['Oceana,', 'Inc.']\n",
|
||
"['LaunchThat']\n",
|
||
"['UX', 'Design', 'Institute']\n",
|
||
"['Amyris']\n",
|
||
"['Shine', 'Bright', 'Care', 'LLC']\n",
|
||
"['Alston', 'Construction']\n",
|
||
"['Hello', 'Customer']\n",
|
||
"['S-Docs']\n",
|
||
"['Neil', 'Kelly', 'Company']\n",
|
||
"['Squaretrade', 'Inc.']\n",
|
||
"['Everyday', 'Software,', 'SL']\n",
|
||
"['Webdox', 'CLM']\n",
|
||
"['Hazlett', 'Tree', 'Service']\n",
|
||
"['High', 'Frequency']\n",
|
||
"['CloudRadial']\n",
|
||
"['Ideoclick']\n",
|
||
"['Equiem']\n",
|
||
"['Long', 'Beach', 'Community', 'College', 'District', '-', 'LA', 'SBDC']\n",
|
||
"['enChoice', 'Inc']\n",
|
||
"['Royal', 'Family', 'Kids,', 'Inc.']\n",
|
||
"['SilverCar']\n",
|
||
"['Anchor', 'QEA,', 'LLC']\n",
|
||
"['Maven', 'Clinic']\n",
|
||
"['Flight', 'Centre']\n",
|
||
"['Shoplogix', '(Asia', 'Pacific)', 'Limited', '(Formerly', 'Projection', 'HK)']\n",
|
||
"['North', 'Capital']\n",
|
||
"['Johnson', '&', 'Johnson', 'Surgical', 'Vision,', 'Inc.']\n",
|
||
"['Hexa', 'Assessments', 'dba', 'Talexes']\n",
|
||
"['Raketech', 'Group']\n",
|
||
"['EnsembleIQ']\n",
|
||
"['Ogi']\n",
|
||
"['Grammarly']\n",
|
||
"['PSC', 'Consulting']\n",
|
||
"['Trinity', 'College', 'London']\n",
|
||
"['Estes', 'Construction']\n",
|
||
"['FIVE', 'Holdings,', 'Inc']\n",
|
||
"[\"Women's\", 'Resource', 'Center']\n",
|
||
"['Pablo', '&', \"Rusty's\", 'Coffee']\n",
|
||
"['Lawyers', 'Committee', 'for', 'Civil', 'Rights', 'Under', 'Law']\n",
|
||
"['Richter10.2', 'Media', 'Group']\n",
|
||
"['Venair']\n",
|
||
"['SIT', 'International']\n",
|
||
"['Cultural', 'Care']\n",
|
||
"['DoubleVerify']\n",
|
||
"['Life', 'House']\n",
|
||
"['ASAPP']\n",
|
||
"['GLDN']\n",
|
||
"['Community', 'Fire', 'Prevention']\n",
|
||
"['Capillary', 'Technologies']\n",
|
||
"['Landis']\n",
|
||
"['Blacklane', 'GmbH']\n",
|
||
"['American', 'Concrete', 'Pipe', 'Association']\n",
|
||
"['The', 'CORE', 'Group']\n",
|
||
"['Market', 'Track', 'LLC', 'dba', 'Numerator']\n",
|
||
"['Medical', 'Practice', 'Management', 'Solutions']\n",
|
||
"['Recast', 'Software']\n",
|
||
"['Skan.ai']\n",
|
||
"['Smart', 'Choice']\n",
|
||
"['Hello', 'Insight']\n",
|
||
"['Agility', 'Robotics']\n",
|
||
"['American', 'State', 'Bank']\n",
|
||
"['Transifex', 'Opco', 'LLC']\n",
|
||
"['Cappfinity']\n",
|
||
"['Houwzer,', 'Inc']\n",
|
||
"['Feldman', 'Surveyors']\n",
|
||
"['CognisantMD']\n",
|
||
"['Clay', 'County', 'Clerk', 'of', 'Court', 'and', 'Comptroller', 'Office']\n",
|
||
"['Wiliot']\n",
|
||
"['Jimmy', 'Brings']\n",
|
||
"['AIIM']\n",
|
||
"['Wound', 'Care', 'Advantage']\n",
|
||
"['One', 'Network', 'Enterprises']\n",
|
||
"['Harri', 'LLC']\n",
|
||
"['Community', 'Transportation', 'Association', 'of', 'America']\n",
|
||
"['Puls']\n",
|
||
"['Shopify', 'Inc.']\n",
|
||
"['Semantic', 'Web', 'Company', 'GmbH']\n",
|
||
"['EF', 'Educational', 'Foundation', 'for', 'Foreign', 'Study']\n",
|
||
"['Upland', 'Software', 'Inc.']\n",
|
||
"['Global', 'Systems', 'Engineering']\n",
|
||
"['G2.com,', 'Inc']\n",
|
||
"['Petroleum', 'Analyzer', 'Company']\n",
|
||
"['Quality', 'Built']\n",
|
||
"['Energy', 'Foundation']\n",
|
||
"['ManyChat,', 'Inc.']\n",
|
||
"['People', 'Untapped']\n",
|
||
"['Optimize', 'Wealth', 'Management']\n",
|
||
"['Perchwell']\n",
|
||
"['Rystad', 'Energy']\n",
|
||
"['DesktopShipper']\n",
|
||
"['New', 'England', 'Mechanical', 'Overlay', '(NEMO)']\n",
|
||
"['Scinapsis', 'Analytics', 'Inc.', 'd.b.a.', 'BenchSci']\n",
|
||
"['Eagle', 'Investment', 'Systems', 'LLC']\n",
|
||
"['Falkonry', 'Inc.']\n",
|
||
"['Exor', 'International', '(USA)']\n",
|
||
"['Outside', 'In']\n",
|
||
"['UX', 'Design', 'Institute']\n",
|
||
"['Studex', 'Corporation']\n",
|
||
"['Givergy']\n",
|
||
"['Porch']\n",
|
||
"['StatesServ/Hospicelink']\n",
|
||
"['The', 'a2', 'Milk', 'Company']\n",
|
||
"['Bungii,', 'LLC']\n",
|
||
"['My', 'Food', 'Bag']\n",
|
||
"['Data', 'Words']\n",
|
||
"['Dr.', 'First']\n",
|
||
"['Sidekick', 'Health']\n",
|
||
"['Sirma', 'AI', 'AD,', 'trading', 'as', 'Ontotext']\n",
|
||
"['CHEK', 'Institute']\n",
|
||
"['Universal', 'Technical', 'Resource', 'Services,', 'Inc.']\n",
|
||
"['Falkonry', 'Inc.']\n",
|
||
"['MOLITOR', 'Formations']\n",
|
||
"['Sword', 'Security']\n",
|
||
"['Missouri', 'State', 'Employees', 'Retirement', 'System']\n",
|
||
"['Pickup', 'Now', 'Inc']\n",
|
||
"['Papa', 'Technologies', 'LLC']\n",
|
||
"['Marble', 'Law', 'Firm']\n",
|
||
"['Parchment,', 'Inc.']\n",
|
||
"['Well', 'Health,', 'Inc.', 'dba', 'Artera']\n",
|
||
"['Skuid']\n",
|
||
"['Veriff', 'OÜ']\n",
|
||
"['Soham,', 'Inc.', '(Zenoti)']\n",
|
||
"['Caliza', 'DBA', 'Homesie/Landing']\n",
|
||
"['iAdvize', 'SAS']\n",
|
||
"['Milner', 'International', 'College', 'of', 'English']\n",
|
||
"['Adkins', 'Management', 'and', 'Consulting']\n",
|
||
"['Partender,', 'Inc.']\n",
|
||
"['Hometime']\n",
|
||
"['National', 'Wildlife', 'Federation']\n",
|
||
"['Petland', 'Kennesaw']\n",
|
||
"['Petland', 'Mall', 'of', 'Georgia']\n",
|
||
"['Lash', 'Affair', 'by', 'J.', 'Paris,', 'LLC']\n",
|
||
"['Freshworks,', 'Inc.']\n",
|
||
"['Petland', 'Summerville']\n",
|
||
"['FacilisGroup']\n",
|
||
"['Clay', 'County', 'Clerk', 'of', 'Court', 'and', 'Comptroller', 'Office']\n",
|
||
"['Sweet', 'Cow', 'Ice', 'Cream']\n",
|
||
"['Kovo', 'Inc.']\n",
|
||
"['Anchor', 'Labs,', 'LLC']\n",
|
||
"['HINKLEY,', 'INC.']\n",
|
||
"['Solidus', 'Labs']\n",
|
||
"['Ogi']\n",
|
||
"['LTK']\n",
|
||
"['The', 'CX', 'Company']\n",
|
||
"['Wag', 'Labs,', 'Inc.']\n",
|
||
"['Paramount', 'Commerce']\n",
|
||
"['Akua', 'Mind', '&', 'Body']\n",
|
||
"['Anchain.ai']\n",
|
||
"['Pipedrive,', 'Inc.']\n",
|
||
"['Canavan', 'Byrne']\n",
|
||
"['Alston', 'Construction']\n",
|
||
"['Keystone', 'RV', 'Company']\n",
|
||
"['LJ', 'Hooker', 'Corporation', 'Pty', 'Ltd']\n",
|
||
"['Zenoti(Toni', '&', 'Guy)']\n",
|
||
"['National', 'Contracting', 'Center']\n",
|
||
"['Wild', 'Health']\n",
|
||
"['TripleSeat']\n",
|
||
"['Cloudticity']\n",
|
||
"['OvalEdge']\n",
|
||
"['ForceManager']\n",
|
||
"['Remind']\n",
|
||
"['LoadUp', 'Technologies,', 'LLC']\n",
|
||
"['Zenjob', 'GmbH']\n",
|
||
"['AgencyBloc']\n",
|
||
"['Serge']\n",
|
||
"['English', 'for', 'Asia']\n",
|
||
"['uConnect']\n",
|
||
"['Plan', 'To', 'Protect']\n",
|
||
"['Bonfire', 'Interactive', 'Ltd.']\n",
|
||
"['RogueHire,', 'LLC']\n",
|
||
"['Total', 'Dealer', 'Compliance']\n",
|
||
"['SalesForce,', 'Inc.']\n",
|
||
"['STT', 'Security', '&', 'Investigative', 'Services']\n",
|
||
"['Encore', 'Healthcare']\n",
|
||
"['Engel', '&', 'Völkers', 'Americas,', 'Inc']\n",
|
||
"['Herrmann']\n",
|
||
"['Cayuse']\n",
|
||
"['Ursa', 'Major', 'Technologies']\n",
|
||
"['The', 'Federal', 'Liberal', 'Agency', 'of', 'Canada']\n",
|
||
"['Company', 'Nurse']\n",
|
||
"['WeTravel']\n",
|
||
"['MarketScale']\n",
|
||
"['Mizuno', 'Usa,', 'Inc.']\n",
|
||
"['At', 'World', 'Properties,', 'LLC']\n",
|
||
"['Talener']\n",
|
||
"['Practo', 'PTE', 'Ltd.']\n",
|
||
"['Prenda']\n",
|
||
"['Arrcus']\n",
|
||
"['Yondr', 'USA', 'LLC']\n",
|
||
"['Scrivas']\n",
|
||
"['AutoServe1']\n",
|
||
"['InvestisDigital']\n",
|
||
"['Eldersource']\n",
|
||
"['Dialog']\n",
|
||
"['Own', 'Solutions', 'Financial', 'Services', 'Limited']\n",
|
||
"['Crayon']\n",
|
||
"['Slope', 'Software']\n",
|
||
"['Rokt', 'Pte', 'Ltd']\n",
|
||
"['CyberCube']\n",
|
||
"['Everlaw']\n",
|
||
"['Compass', 'RE']\n",
|
||
"['Solo']\n",
|
||
"['Dolly', 'Inc.']\n",
|
||
"['LTK']\n",
|
||
"['Access', 'E-Forms']\n",
|
||
"['Training', 'Concepts']\n",
|
||
"['Luma', 'Health,', 'Inc.']\n",
|
||
"['FRSecure']\n",
|
||
"['MedicalDirector']\n",
|
||
"nan\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"arr.index.astype(str)\n",
|
||
"words = arr.index.str.split()\n",
|
||
"for word in words:\n",
|
||
" print(word)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"354\n",
|
||
"509\n",
|
||
"nan\n",
|
||
"<class 'str'>\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"acct_list = []\n",
|
||
"acct_list = arr.index.to_list()\n",
|
||
"acct_list2 = pages.index.to_list()\n",
|
||
"print(len(acct_list))\n",
|
||
"print(len(acct_list2))\n",
|
||
"\n",
|
||
"for item2 in acct_list2:\n",
|
||
" acct_list.append(item2)\n",
|
||
"\n",
|
||
"for item in acct_list:\n",
|
||
" if type(item) == float:\n",
|
||
" acct_list.remove(item)\n",
|
||
" item = str(item)\n",
|
||
" print(item)\n",
|
||
" print(type(item))\n",
|
||
" \n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"SilverCar Silvercar\n",
|
||
"Landis LandisU\n",
|
||
"AIIM AIIM+\n",
|
||
"DesktopShipper Desktop Shipper\n",
|
||
"TripleSeat Tripleseat\n",
|
||
"InvestisDigital Investis Digital\n",
|
||
"Bolt Volt\n",
|
||
"Landis LandisU\n",
|
||
"Ocean Oceana\n",
|
||
"Skuad Skool Skuid Skool\n",
|
||
"10\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"tup_list = []\n",
|
||
"for (name1, name2,) in itertools.combinations(acct_list, 2):\n",
|
||
" distance = lev(name1, name2)\n",
|
||
" if distance > 0 and distance < 2:\n",
|
||
" print(name1, name2)\n",
|
||
" tup_list.append((name1, name2))\n",
|
||
"\n",
|
||
"print(len(tup_list))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "jupyter",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.1"
|
||
},
|
||
"orig_nbformat": 4
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|