{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import pandas as pd \n", "import numpy as np \n", "import seaborn as sns \n", "import matplotlib.pylab as plt \n", "import sketch\n", "plt.style.use('ggplot')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "pages = pd.read_csv(\"/Users/normrasmussen/Downloads/app_usage.csv\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(59433, 3)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pages.shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
School NamePathMar 13 - Apr 11
0Vivvix University for Ad Intel/courses25
1Vivvix University for Ad Intel/previews16
2Vivvix University for Ad Intel/account_links11
3Vivvix University for Ad Intel/analytics8
4Vivvix University for Ad Intel/settings/general/edit7
5Vivvix University for Ad Intel/apps/integrations4
6Vivvix University for Ad Intel/accounts/general/edit4
7Vivvix University for Ad Intel/people3
8Vivvix University for Ad Intel/settings/authentication3
9Vivvix University for Ad Intel/styling/general/edit2
\n", "
" ], "text/plain": [ " School Name Path Mar 13 - Apr 11\n", "0 Vivvix University for Ad Intel /courses 25\n", "1 Vivvix University for Ad Intel /previews 16\n", "2 Vivvix University for Ad Intel /account_links 11\n", "3 Vivvix University for Ad Intel /analytics 8\n", "4 Vivvix University for Ad Intel /settings/general/edit 7\n", "5 Vivvix University for Ad Intel /apps/integrations 4\n", "6 Vivvix University for Ad Intel /accounts/general/edit 4\n", "7 Vivvix University for Ad Intel /people 3\n", "8 Vivvix University for Ad Intel /settings/authentication 3\n", "9 Vivvix University for Ad Intel /styling/general/edit 2" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pages.head(10)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "pages_sum = pd.DataFrame(columns=['Total Views'])\n", "pages_sum['Total Views'] = pages.groupby(by='School Name')['Mar 13 - Apr 11'].sum()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total Views
School Name
Vivvix for Ad Intel110
'@properties322
4 Pillars112
AIIM+528
ASAPP153
| AgencyBloc3373
Académie Évolupharma253
Access11
ActiveLearner1907
Ad Intel2
\n", "
" ], "text/plain": [ " Total Views\n", "School Name \n", " Vivvix for Ad Intel 110\n", "'@properties 322\n", "4 Pillars 112\n", "AIIM+ 528\n", "ASAPP 153\n", " | AgencyBloc 3373\n", "Académie Évolupharma 253\n", "Access 11\n", "ActiveLearner 1907\n", "Ad Intel 2" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "to_remove = ['Academy', 'academy', 'University', 'university', 'school', 'School', 'Sandbox', 'Knowledge Base']\n", "for item in pages_sum.itertuples(index=True):\n", " for word in to_remove:\n", " if word in item[0]:\n", " less = item[0].replace(word, '')\n", " pages_sum.rename(index={item[0]:less}, inplace=True)\n", "\n", "pages_sum.head(10)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "pages_sum.index = pages_sum.index.str.strip()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'Total Views': {'Vivvix for Ad Intel': 110, \"'@properties\": 322, '4 Pillars': 112, 'AIIM+': 528, 'ASAPP': 153, '| AgencyBloc': 3373, 'Académie Évolupharma': 253, 'Access': 11, 'ActiveLearner': 1907, 'Ad Intel': 765, 'AdCellerant': 5, 'Adkins': 107, 'Agility': 156, 'AgriWebb': 567, 'AgriWebb Brazil': 333, 'Akua Mind Body': 578, 'AlphaSense': 17, 'Altenew': 859, 'Altima Dental': 33, 'American Concrete Pipe Association Learning Center': 2864, 'AnChain.AI': 437, 'AndHealth's': 2628, 'Andhealth Internal Training': 17, 'Ansley': 116, 'Anthology': 13, 'Anthology Internal': 104, 'Anthology Trial': 418, 'Aquent's Learning Lab': 16, 'Arrcus': 8, 'Artera': 54, 'Ascend Training by Stenograph': 9, 'AtlasIED': 125, 'Atlassian': 41, 'AutoServe1': 4, 'Autoserve1 New': 1, 'BCNDP Caucus': 26, 'BLVD Mentor Learning Library': 223, 'BNY Mellon OMNI Digital': 364, 'Bartosz's': 3, 'Bellhop': 118, 'Belmont Farm Nursery': 29, 'BenchSci': 1659, 'Big Ideas Learning': 167, 'Big Ideas Learning Sandbox': 7, 'BirchStreet': 11, 'BirchStreet Trial': 446, 'Bisla': 402, 'Blacklane Chauffeur Learning': 263, 'Blacklane Dubai Chauffeurs': 272, 'Blacklane Learning & Development': 1913, 'Blackstone Industrial': 42, 'Bolt': 1177, 'BombBomb Studios': 673, 'Bonfire': 19, 'Boon Edam': 164, 'Brand Addition's': 457, 'Brian's Starfleet': 3, 'Brighton Science': 1703, 'Bungii Operations': 155, 'CATalyst': 161, 'CHEK Institute': 11138, 'CIBC FirstCaribbean Online and Mobile Banking': 5, 'CIRE': 149, 'CMIC': 200, 'CONCEPT ACADEMY': 468, 'CORE College': 1419, 'CRM Training Courses and Tutorials | Pipedrive Learn': 984, 'CSNews Technology': 1, 'CTAA's': 1947, 'Canavan Byrne': 1380, 'Capillary': 1473, 'Cappfinity': 108, 'Capstone Scholars Program': 456, 'Cayuse': 99, 'Censis': 1300, 'Channable Internal': 551, 'Channable': 40, 'Chann': 152, 'Chico Electric': 135, 'Cholodecki current': 1, 'Christian': 5, 'Christie's International Real Estate pl@tform': 273, 'ChurnZero': 41, 'Clay Clerk': 525, 'CloudRadial': 455, 'Cloudticity': 27, 'Cluid Housing': 65, 'Coastal Consulting': 114, 'Cofense': 129, 'Community Fire Prevention': 37, 'Compass': 4327, 'Compass U': 265, 'Competitive Advantage': 48, 'Con-Trol Training System': 816, 'Con-Trol Training System - BambooHR': 179, 'Conlon's': 598, 'Cook Brother's Bars': 361, 'Corporate Traveler': 3, 'Curso de Formação': 93, 'CurveBeam': 87, 'DELETE - Airbnb Host Hub': 12, 'DELETE - Airbnb Photography': 13, 'DELETE - BLE Dev': 14, 'DELETE - Bright Line Hub': 24, 'DELETE - Bright Liners Hub': 1020, 'DELETE - Dealerware': 14, 'DELETE - Dynamic Yield': 14, 'DELETE - Gett': 13, 'DELETE - H1 Insights Customer': 21, 'DELETE - H1 Insights Inc': 47, 'DELETE - Happy Culture': 15, 'DELETE - Numerator for Promotions Intel': 15, 'DELETE - OTHRSource': 16, 'DELETE - PeopleDev': 14, 'DELETE - Pixability's': 1, 'DELETE - SANDBOX Bright Liners Hub': 28, 'DELETE - The GLAMSQUAD Online Learning Center': 16, 'DELETE - Uber Compliance El Salvador': 14, 'DELETE - Uber Ecuador': 13, 'DELETE - Uber Peru': 19, 'DELETE - Uber Peru Compliance': 14, 'DELETE - Yotpo': 13, 'DV Learning': 601, 'Datawords': 221, 'Dell AMD Learning': 1192, 'Dept Agency': 575, 'Desktop Shipper': 875, 'Devils': 9, 'Dolly': 23, 'DoorDash Shop & Deliver': 10, 'DoorDash's': 592, 'DoubleVerify': 89, 'Douglas Elliman MyLearning.Elliman.com': 943, 'Doximity Compliance Learning': 27, 'Doximity': 183, 'Dray Alliance': 207, 'EASE': 373, 'EF High Exchange Year: IEC': 1703, 'EF High Exchange Year: Staff': 6, 'EF host family resource center': 29, 'EF student learning center': 74, 'ERA Key Realty Services': 66, 'ES&E': 329, 'EXOR & CORVINA': 189, 'ElderSource Ecommerce': 102, 'ElderSource Internal': 16, 'ElderSource Partner': 4, 'Election Protection': 39, 'Embee': 4, 'Encore Healthcare': 290, 'Energy Foundation': 490, 'Engel & Völkers': 566, 'English for Asia': 2270, 'Equip By Equiem': 143, 'Essentium Training Center': 11, 'Everlaw Certification Center': 33, 'Evolve IP': 426, 'Excalibur Data Systems Online Learning': 123, 'FBD Frozen': 27, 'FCA': 1061, 'FCTG Americas': 3474, 'FRSecure': 407, 'Facilis': 308, 'Factorial': 1593, 'Feldman Geospatial Learning Management System': 289, 'FeverBee's Professional Community Management Courses': 11, 'Figma': 1, 'First Mile's': 279, 'Flink': 1472, 'Foleon': 1776, 'Football Marketing Asia': 12, 'For The Children': 319, 'Force': 3, 'Formation Planifions pour protégerMC': 46, 'Formation de l'Équipe Trudeau': 50, 'Founder Portal': 3, 'Freshworks': 259, 'Front': 975, 'Futuri': 152, 'G2's': 293, 'GLAS Agency': 4, 'Garrett Virtual': 376, 'GenTech Associates': 4599, 'GetPixel': 1969, 'Givergy': 146, 'Glassdoor': 1112, 'Global Systems Engineering': 25, 'GoGuardian': 94, 'GomezLee Marketing': 3, 'Grammarly': 15, 'Grow WELL': 907, 'HARDSKILLS': 2338, 'HARDSKILLS-DEMO': 442, 'HELPme': 163, 'HackerRank': 164, 'Hanna': 1440, 'Harri': 609, 'Harri Employee': 8, 'Harri's for McDonalds': 2482, 'Harri's Client Onboarding (IPC)': 18, 'Harri's Journey to Success': 412, 'HealthFirst Training': 286, 'Healthiverse': 2492, 'Hello Insight': 1, 'Herrmann Asia Learning Portal': 1681, 'Hinkley': 535, 'Hometime Hub': 184, 'HopSkipDrive Resources': 780, 'Hornbill': 2, 'Horsepower Brands': 2540, 'Houwzer': 7, 'Hubert dykiel's': 8, 'Humly': 656, 'Hunt Real Estate': 839, 'ISOutsource': 125, 'IceMalta': 148, 'Internal HackerRank': 119, 'Investis Digital': 188, 'Jets Gymnastics': 369, 'Jimmy Brings': 506, 'Jonny davies's': 3, 'Kadince': 203, 'Keystone RV Company': 114, 'Knox': 90, 'Kovo': 5, 'LJ Hooker': 48, 'LTK': 563, 'Landis Customer': 10, 'Landis Internal': 26, 'Landis': 30, 'LandisU': 30, 'Lash Affair': 56, 'LaunchThat': 263, 'Lawpath': 193, 'Learn.kiwi.com - Learning and Development': 623, 'Learning Lab': 99, 'Levi green's': 7, 'Library': 69, 'Life House': 109, 'Light Brigade': 302, 'Lighthouse by LJ Hooker Group': 1378, 'Lightspeed': 4, 'LiquidSpace's': 2, 'Little Harvard': 1, 'LoadUp': 183, 'Lukasz cholodecki's': 6, 'Luma Health U': 1, 'Luma U': 3459, 'Luminate': 1, 'Luvo': 594, 'MANTA': 13, 'MANTA Hub': 6, 'MASSÉNA Formations': 4009, 'MC Personalization eCampus': 977, 'MaRS Discovery District': 463, 'Magis Center': 2136, 'Malta International Airport | Northpass eLearning Hub': 1084, 'Marble Law': 538, 'Mark43 Institute': 152, 'Mark43': 323, 'MarketScale Creator Community': 499, 'Marketing Pony': 1, 'Marketscale Director's Chair': 20, 'Marta's': 133, 'Matt cochran's': 62, 'Mattr': 31, 'Maven Care Team': 1579, 'Maven Clinic's': 92, 'MedicalDirector's': 62, 'Meditech': 8, 'MicroFour': 173, 'Minga': 39, 'Missouri State Employees Retirement System': 1, 'Mizuno': 895, 'Mizuno Running': 195, 'Motrain': 427, 'Murphy Geospatial Northpass Account': 3, 'My Food Bag': 49, 'My Meeting Courses': 767, 'MyAuPairCourses': 103, 'MyEquip By Equiem': 352, 'NCC's': 15, 'NCC's Medicare Sales': 518, 'NEMO Safety Training': 255, 'Nana': 270, 'Napa Center': 110, 'Neil Kelly': 256, 'Nest U': 434, 'Netradyne': 1485, 'Network First': 1738, 'Network Q': 867, 'Next Theme Test': 7, 'NextIQ': 122, 'North Capital': 7, 'Northpass Catalog': 28, 'Northpass Product Training': 37, 'Northpass for Employees': 65, 'Noventiq': 86, 'Ocean': 245, 'Oceana': 26, 'Ogi': 156, 'Oiltanking Training': 847, 'Omnisend': 123, 'OnDemand by enChoice': 7, 'Onclusive': 230, 'Ontotext': 10, 'Oodrive': 1590, 'OpenSesame': 7, 'Optimize': 24, 'Ornikar': 408, 'Otoqi': 397, 'Outside In': 909, 'OvalEdge': 213, 'Own Solutions and Aplauz': 223, 'PAC': 17, 'PAC -Team Learning': 2, 'PCTEL': 720, 'PICKUP HR': 19, 'PICKUP SOP Instance': 229, 'PICKUP': 14, 'PITM': 147, 'PSC's Professional Development Platform': 198, 'Pablo & Rusty's Online Education': 59, 'Pal': 179, 'Papa Internal': 2489, 'Parchment Learn': 394, 'Parting Stone': 3018, 'Path Onboarding': 1040, 'PebblePad': 172, 'People Untapped Online Learning Centre': 697, 'Pepper Content': 344, 'Perchwell': 42, 'Petal Solutions Inc.': 424, 'Phoy phen's': 2, 'Pipedrive': 236, 'Pipedrive Sandbox': 70, 'Pipedrive Learn': 5, 'Plan to Protect® On-Line Training': 1153, 'Plan to Protect® dwell Abuse Awareness Training (in partnership with New Apostolic Church Canada)': 14, 'PoolParty': 52, 'Porch': 140, 'Prenda': 818, 'Prism Maritime': 61, 'Procuro': 387, 'Psynapse Education': 32, 'Puls Tech Resource Center': 873, 'Puls Technician Resource Center': 1, 'Quality Built': 226, 'Raptor': 119, 'Real Estate': 1411, 'Recast': 1200, 'ReconArt Studio': 195, 'Red Lion Controls'': 258, 'Remind Learning Center': 11, 'Remind': 3, 'Renaissance': 25, 'Renaissance U': 53, 'Riviera Partners': 66, 'Rob's': 3, 'Roboflow': 15, 'Robot Bootcamp (Internal)': 433, 'Roosted': 233, 'Rootstrap': 1759, 'Ryan agnello's': 83, 'Rystad Energy Employee Training Center': 84, 'Rystad Energy': 179, 'Rystad Energy's Learning Center': 3059, 'S-Docs': 31, 'SIT Online': 140, 'STOPit SEL, Safety, & Wellness Training': 389, 'STOPit Solutions': 185, 'STT Security': 247, 'STUDEX Germany': 4, 'STUDEX Mexico': 3, 'STUDEX Poland': 4, 'STUDEX USA': 517, 'STUDEX United Kingdom': 289, 'Safic'': 105, 'Scenario': 958, 'of Rokt': 410, 'ScreenBeam Learning': 7, 'ScreenBeam's Global': 6, 'Scrivas': 133, 'Securonix': 1014, 'Securonix Internal': 12, 'Securonix Partner Program': 351, 'Serge': 1369, 'Service Coordination Unlimited': 325, 'Shine Bright Care Training': 37, 'ShipEx's': 248, 'Shipt Passport': 1034, 'Shopify Certification': 30, 'Shopify GO': 6, 'Shopify Ignite': 12, 'Shopify Partner': 1, 'Showdigs': 1137, 'Sidekick': 249, 'Silvercar': 29, 'SimplrFlex': 97, 'Skan': 263, 'Skin Laundry': 52, 'Skuad Skool': 461, 'Skuid Customer': 3, 'Skuid Skool': 674, 'Smart Start Training Modules': 6, 'SmartPM': 200, 'Solo Certified': 54, 'Solo Internal Training': 399, 'Spark Driver Resources Hub': 1093, 'SpedTrack': 197, 'Splash': 2, 'Spotlight': 1798, 'Spring Point': 249, 'SquareTrade GO': 99, 'Staff Courses': 21, 'StateServ Training': 2103, 'Stenograph': 3416, 'Stenograph Education Network': 16, 'Strategos': 239, 'SubItUp': 13, 'Swift': 843, 'Swift Medical': 57, 'Sword Security': 311, 'TONI&GUY LEARNING HUB': 1066, 'TZA': 47, 'Talent Finder Resource Hub': 53, 'Talkspace': 240, 'Talkspace - Northpass': 1031, 'Talkspace National Practice Provider Training': 46, 'Teach Plus': 142, 'Team Trudeau Training': 23, 'Terminus': 77, 'The CX': 874, 'The Heico Companies': 4, 'The Knowledge | MATTR': 1225, 'The Latimer Group': 92, 'The Recruiter': 1276, 'Tidel's': 848, 'Tomasz król's': 100, 'Toni&Guy MVP': 13, 'Toni&Guy Passport': 7, 'Tonkean': 11, 'Total Dealer Compliance': 110, 'Trackforce Valiant': 146, 'Training Center': 3205, 'Training Concepts': 842, 'Trexity': 168, 'Trinity College London: Trinity Teach': 19, 'Tripleseat': 274, 'Turo': 27, 'Turo Learning Hub': 95, 'Turo On-Call': 11, 'UTRS Learning Management System': 111, 'UX Design - Learning Paths': 1, 'UX Design Institute': 3583, 'Universal': 1345, 'Upland CXM': 28, 'Upland Support': 1572, 'Upland': 1138, 'Uptick': 235, 'Ursa Major Training': 1071, 'Venair': 1095, 'Veriff's': 78, 'Ververica': 119, 'Vision Excellence Institute': 2, 'Viva Wallet': 2365, 'Voigt Smith Innovation': 1, 'Volt': 408, 'Volt-staging': 18, 'Volumental': 912, 'WELL Health Partners': 25, 'Wag!': 15, 'Walmart Luminate's': 482, 'Walmart Luminate's (MIGRATED TO AZURE)': 21, 'Walmart': 29, 'WeTravel Courses': 111, 'Webdox': 78, 'Whitetail Properties Education Platform': 3604, 'Wild Health': 756, 'Wild Health Education Portal': 620, 'Wiliot': 22, 'Will kozinski's': 31, 'Williams Sonoma Trial': 9, 'Winchester Carlisle': 213, 'Wingz': 247, 'Women's Resource Center': 7, 'Yakademy': 1, 'Your Pathway Home': 3, 'ZENOTI': 42, 'ZU EWC': 14, 'ZU Sorbet': 8, 'Zenjob': 406, 'Zenoti Employee Training portal': 306, 'Zenoti QA': 70, 'Zenoti': 3639, 'Zion Engagement and Planning': 5, '[Sandbox] Charlie's': 15, '[Sandbox] Kaitlyn Folsom's': 2, 'beqom': 317, 'd.': 170, 'dbaPlatform': 82, 'iAdvize': 241, 'iConnect Training': 182, 'i': 467, 'iorad': 12, 'my LCC Courses!': 1410, 'occy': 207, 'uConnect': 133, 'Łukasz Wnęk - production': 29}}\n" ] } ], "source": [ "test_dict = pages_sum.to_dict()\n", "print(test_dict)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Twotal
School Name
'@properties322
4 Pillars112
AIIM+528
ASAPP153
Académie Évolupharma253
......
Yakademy1
Your Pathway Home3
ZENOTI42
ZU EWC14
ZU Sorbet8
\n", "

489 rows × 1 columns

\n", "
" ], "text/plain": [ " Twotal\n", "School Name \n", "'@properties 322\n", "4 Pillars 112\n", "AIIM+ 528\n", "ASAPP 153\n", "Académie Évolupharma 253\n", "... ...\n", "Yakademy 1\n", "Your Pathway Home 3\n", "ZENOTI 42\n", "ZU EWC 14\n", "ZU Sorbet 8\n", "\n", "[489 rows x 1 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pages_three = pd.DataFrame(columns=['Twotal'])\n", "pages_three['Twotal'] = pages_sum.groupby(['School Name'])['Total Views'].sum()\n", "pages_three.head(-20)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "pages_three.loc[pages_three.index == 'Zenjob']\n", "acct_list = pages_three.index.to_list()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Bolt Volt\n", "Landis LandisU\n", "Ocean Oceana\n", "Skuad Skool Skuid Skool\n" ] } ], "source": [ "from Levenshtein import distance as lev\n", "import itertools\n", "\n", "acct1 = []\n", "comp = []\n", "for (\n", " name1,\n", " name2,\n", ") in itertools.combinations(acct_list, 2):\n", " # print(name1, name2) - prints all pairs, working so far.\n", " distance = lev(name1, name2)\n", " # print(distance) - successfully returns numbers\n", " if distance > 0 and distance < 2:\n", " print(name1, name2)\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Stored 'pages' (DataFrame)\n" ] } ], "source": [ "pages = pages_three\n", "%store pages" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "arr = pd.read_csv(\"/Users/normrasmussen/Downloads/cust_arr.csv\", index_col=0, header=[0])\n", "arr.fillna(0, inplace=True)\n", "arr.replace(',','', regex=True, inplace=True)\n", "arr['ARR'] = arr['ARR'].astype(int)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "arr = arr.drop(axis=1, columns=['Renewaldate Date', 'CSM'])" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(354, 1)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr.shape" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Stuart']\n", "['HelloTech', 'Inc.']\n", "['Shopify', 'Inc.']\n", "['Hardskills', 'Pte.', 'Ltd']\n", "['Shipt']\n", "['SPS', 'Commerce']\n", "['CDP']\n", "['Altenew']\n", "['BrightMove']\n", "['SITA', 'B.V.', 'c/o', 'Malta', 'International', 'Airport']\n", "['Football', 'Marketing', 'Asia']\n", "['Prism', 'Maritime']\n", "['Skuid']\n", "['SmartPM', 'Technologies']\n", "['DoorDash,', 'Inc.']\n", "['International', 'Care', 'Ltd.']\n", "['BrightMove']\n", "['Epiphany', 'Healthcare']\n", "['Artsy']\n", "['Évolupharma,', 'Inc.']\n", "['UAB', 'Omnisend']\n", "['Epiphany', 'Healthcare']\n", "['Artsy']\n", "['Renu', 'Contracting', 'Restoration']\n", "['Minga']\n", "['Renu', 'Contracting', 'Restoration']\n", "['Viva', 'Online', 'Services', 'SA']\n", "['Health', 'Current']\n", "['Cherokee', 'Enterprises,', 'Inc.']\n", "['Kinetica']\n", "['Innovaccer', 'Inc.']\n", "['Bellhop,', 'Inc']\n", "['ISOutsource']\n", "['Safic-Alcan']\n", "['PetalMD']\n", "['GomezLee', 'Marketing']\n", "['PSC', 'Consulting']\n", "['STATCARE', 'URGENT', 'CARE']\n", "['Tru', 'Realty']\n", "['Netradyne']\n", "['H1', 'Insights,', 'Inc.']\n", "['Hardskills', 'Pte.', 'Ltd']\n", "['BambooHR']\n", "['Velocity', 'Risk', 'Underwriters,', 'LLC']\n", "['Harri', 'LLC']\n", "['Rootstrap']\n", "['First', 'Mile']\n", "['Lawpath']\n", "['Brand', 'Addition']\n", "['Roboflow']\n", "['Oiltanking', 'GmbH']\n", "['Safari', 'Energy']\n", "['Larson', 'Texts']\n", "['Spring', 'Point']\n", "['Legislative', 'Assembly', 'of', 'British', 'Columbia']\n", "['National', 'Campus', 'and', 'Community', 'Radio', 'Association']\n", "['Psynapse', 'Psychometrics', 'Pty', 'Ltd']\n", "['Portnoy,', 'Messinger,', 'Pearl', '&', 'Associates,', 'Inc.']\n", "['Trax', 'Technology', 'Solutions', 'Pte', 'Ltd']\n", "['Uber', 'London', 'Limited', '(POOL)']\n", "['Ansan', 'Industries', 'Ltd.']\n", "['connectRN', 'Inc.']\n", "['Usdan', 'Summer', 'Camp', 'for', 'the', 'Arts']\n", "['Carlson', 'Capital', 'Management']\n", "['Safari', 'Energy']\n", "['Surmesur']\n", "['connectRN', 'Inc.']\n", "['ManyChat,', 'Inc.']\n", "['Service', 'Coordination', 'Unlimited']\n", "['Doximity', 'Inc.']\n", "['Starwood', 'Pet', 'Travel']\n", "['Future', 'Point', 'of', 'View', '(Tri-Corps)']\n", "['Form', 'Energy']\n", "['Channable']\n", "['Hammer', 'Nutrition']\n", "['Uber', 'B.V.', '-', 'Uber', 'LatAm', 'Compliance']\n", "['Securonix', 'Inc.']\n", "['HopSkipDrive']\n", "['Kiwi']\n", "['People', 'Untapped']\n", "['beqom']\n", "['Casio', 'America', 'Inc']\n", "['Riviera', 'Partners', 'LLC']\n", "['The', 'Latimer', 'Group']\n", "['Viveve']\n", "['Volt', 'Systems']\n", "['OTHRSource']\n", "['Viveve']\n", "['Material', 'Control', 'Systems,', 'Inc.']\n", "['Evolve', 'IP']\n", "['Cook', 'Brothers', 'Bars']\n", "['STOPit', 'Solutions']\n", "['Broche', 'Ballet']\n", "['Virtually', 'Human', 'Studio']\n", "['BrightLine', 'Eating', 'Solutions', 'LLC']\n", "['AgriWebb']\n", "['Wingz,', 'Inc.']\n", "['Noventiq', 'Holdings', 'plc']\n", "['Corporate', 'Traveler', 'UK']\n", "['Kadince']\n", "['AndHealth']\n", "['Swift', 'Medical']\n", "['Agility', 'Inc.']\n", "['ZyXel', 'Communications', 'Inc']\n", "['UX', 'Design', 'Institute']\n", "['STATCARE', 'URGENT', 'CARE']\n", "['MaRS', 'Discovery', 'District']\n", "['Altima', 'Dental', 'Canada']\n", "['Aquent']\n", "['FeverBee']\n", "['Company', 'Nurse']\n", "['DoubleVerify']\n", "['Pepper', 'Content', 'Inc']\n", "['NextHealth']\n", "['Community', 'Transportation', 'Association', 'of', 'America']\n", "['Teach', 'Plus']\n", "['Winchester', 'Carlisle']\n", "['Ceres', 'University']\n", "['BrandActive', 'International', 'Inc.']\n", "['Psychiatry-UK']\n", "['Horsepower', 'Brands']\n", "['Dept', 'Holding', 'B.V.']\n", "['BioLife', 'Solutions,', 'Inc.']\n", "['Walmart', 'Inc.']\n", "['Conlon', 'Construction']\n", "['Nana', 'Technologies', 'Inc.']\n", "['Turo,', 'Inc.']\n", "['BombBomb']\n", "['The', 'Financial', 'Times', 'Limited']\n", "['Southeastrans', 'Inc.']\n", "['Type', 'A', 'Media']\n", "['Jets', 'Gymnastics']\n", "['Reconart']\n", "['Excalibur', 'Data', 'Systems']\n", "['Parkopoly']\n", "['Strategos', 'International']\n", "['Douglas', 'Elliman', 'Real', 'Estate']\n", "['Oodrive']\n", "['Swift', 'Medical']\n", "['Canavan', 'Byrne']\n", "['Dray', 'Alliance', 'Inc.']\n", "['CanFi', 'Empowerment', 'Education', 'Corp']\n", "['Axios', 'Media', 'Inc.']\n", "['Volumental']\n", "['Oceana,', 'Inc.']\n", "['LaunchThat']\n", "['UX', 'Design', 'Institute']\n", "['Amyris']\n", "['Shine', 'Bright', 'Care', 'LLC']\n", "['Alston', 'Construction']\n", "['Hello', 'Customer']\n", "['S-Docs']\n", "['Neil', 'Kelly', 'Company']\n", "['Squaretrade', 'Inc.']\n", "['Everyday', 'Software,', 'SL']\n", "['Webdox', 'CLM']\n", "['Hazlett', 'Tree', 'Service']\n", "['High', 'Frequency']\n", "['CloudRadial']\n", "['Ideoclick']\n", "['Equiem']\n", "['Long', 'Beach', 'Community', 'College', 'District', '-', 'LA', 'SBDC']\n", "['enChoice', 'Inc']\n", "['Royal', 'Family', 'Kids,', 'Inc.']\n", "['SilverCar']\n", "['Anchor', 'QEA,', 'LLC']\n", "['Maven', 'Clinic']\n", "['Flight', 'Centre']\n", "['Shoplogix', '(Asia', 'Pacific)', 'Limited', '(Formerly', 'Projection', 'HK)']\n", "['North', 'Capital']\n", "['Johnson', '&', 'Johnson', 'Surgical', 'Vision,', 'Inc.']\n", "['Hexa', 'Assessments', 'dba', 'Talexes']\n", "['Raketech', 'Group']\n", "['EnsembleIQ']\n", "['Ogi']\n", "['Grammarly']\n", "['PSC', 'Consulting']\n", "['Trinity', 'College', 'London']\n", "['Estes', 'Construction']\n", "['FIVE', 'Holdings,', 'Inc']\n", "[\"Women's\", 'Resource', 'Center']\n", "['Pablo', '&', \"Rusty's\", 'Coffee']\n", "['Lawyers', 'Committee', 'for', 'Civil', 'Rights', 'Under', 'Law']\n", "['Richter10.2', 'Media', 'Group']\n", "['Venair']\n", "['SIT', 'International']\n", "['Cultural', 'Care']\n", "['DoubleVerify']\n", "['Life', 'House']\n", "['ASAPP']\n", "['GLDN']\n", "['Community', 'Fire', 'Prevention']\n", "['Capillary', 'Technologies']\n", "['Landis']\n", "['Blacklane', 'GmbH']\n", "['American', 'Concrete', 'Pipe', 'Association']\n", "['The', 'CORE', 'Group']\n", "['Market', 'Track', 'LLC', 'dba', 'Numerator']\n", "['Medical', 'Practice', 'Management', 'Solutions']\n", "['Recast', 'Software']\n", "['Skan.ai']\n", "['Smart', 'Choice']\n", "['Hello', 'Insight']\n", "['Agility', 'Robotics']\n", "['American', 'State', 'Bank']\n", "['Transifex', 'Opco', 'LLC']\n", "['Cappfinity']\n", "['Houwzer,', 'Inc']\n", "['Feldman', 'Surveyors']\n", "['CognisantMD']\n", "['Clay', 'County', 'Clerk', 'of', 'Court', 'and', 'Comptroller', 'Office']\n", "['Wiliot']\n", "['Jimmy', 'Brings']\n", "['AIIM']\n", "['Wound', 'Care', 'Advantage']\n", "['One', 'Network', 'Enterprises']\n", "['Harri', 'LLC']\n", "['Community', 'Transportation', 'Association', 'of', 'America']\n", "['Puls']\n", "['Shopify', 'Inc.']\n", "['Semantic', 'Web', 'Company', 'GmbH']\n", "['EF', 'Educational', 'Foundation', 'for', 'Foreign', 'Study']\n", "['Upland', 'Software', 'Inc.']\n", "['Global', 'Systems', 'Engineering']\n", "['G2.com,', 'Inc']\n", "['Petroleum', 'Analyzer', 'Company']\n", "['Quality', 'Built']\n", "['Energy', 'Foundation']\n", "['ManyChat,', 'Inc.']\n", "['People', 'Untapped']\n", "['Optimize', 'Wealth', 'Management']\n", "['Perchwell']\n", "['Rystad', 'Energy']\n", "['DesktopShipper']\n", "['New', 'England', 'Mechanical', 'Overlay', '(NEMO)']\n", "['Scinapsis', 'Analytics', 'Inc.', 'd.b.a.', 'BenchSci']\n", "['Eagle', 'Investment', 'Systems', 'LLC']\n", "['Falkonry', 'Inc.']\n", "['Exor', 'International', '(USA)']\n", "['Outside', 'In']\n", "['UX', 'Design', 'Institute']\n", "['Studex', 'Corporation']\n", "['Givergy']\n", "['Porch']\n", "['StatesServ/Hospicelink']\n", "['The', 'a2', 'Milk', 'Company']\n", "['Bungii,', 'LLC']\n", "['My', 'Food', 'Bag']\n", "['Data', 'Words']\n", "['Dr.', 'First']\n", "['Sidekick', 'Health']\n", "['Sirma', 'AI', 'AD,', 'trading', 'as', 'Ontotext']\n", "['CHEK', 'Institute']\n", "['Universal', 'Technical', 'Resource', 'Services,', 'Inc.']\n", "['Falkonry', 'Inc.']\n", "['MOLITOR', 'Formations']\n", "['Sword', 'Security']\n", "['Missouri', 'State', 'Employees', 'Retirement', 'System']\n", "['Pickup', 'Now', 'Inc']\n", "['Papa', 'Technologies', 'LLC']\n", "['Marble', 'Law', 'Firm']\n", "['Parchment,', 'Inc.']\n", "['Well', 'Health,', 'Inc.', 'dba', 'Artera']\n", "['Skuid']\n", "['Veriff', 'OÜ']\n", "['Soham,', 'Inc.', '(Zenoti)']\n", "['Caliza', 'DBA', 'Homesie/Landing']\n", "['iAdvize', 'SAS']\n", "['Milner', 'International', 'College', 'of', 'English']\n", "['Adkins', 'Management', 'and', 'Consulting']\n", "['Partender,', 'Inc.']\n", "['Hometime']\n", "['National', 'Wildlife', 'Federation']\n", "['Petland', 'Kennesaw']\n", "['Petland', 'Mall', 'of', 'Georgia']\n", "['Lash', 'Affair', 'by', 'J.', 'Paris,', 'LLC']\n", "['Freshworks,', 'Inc.']\n", "['Petland', 'Summerville']\n", "['FacilisGroup']\n", "['Clay', 'County', 'Clerk', 'of', 'Court', 'and', 'Comptroller', 'Office']\n", "['Sweet', 'Cow', 'Ice', 'Cream']\n", "['Kovo', 'Inc.']\n", "['Anchor', 'Labs,', 'LLC']\n", "['HINKLEY,', 'INC.']\n", "['Solidus', 'Labs']\n", "['Ogi']\n", "['LTK']\n", "['The', 'CX', 'Company']\n", "['Wag', 'Labs,', 'Inc.']\n", "['Paramount', 'Commerce']\n", "['Akua', 'Mind', '&', 'Body']\n", "['Anchain.ai']\n", "['Pipedrive,', 'Inc.']\n", "['Canavan', 'Byrne']\n", "['Alston', 'Construction']\n", "['Keystone', 'RV', 'Company']\n", "['LJ', 'Hooker', 'Corporation', 'Pty', 'Ltd']\n", "['Zenoti(Toni', '&', 'Guy)']\n", "['National', 'Contracting', 'Center']\n", "['Wild', 'Health']\n", "['TripleSeat']\n", "['Cloudticity']\n", "['OvalEdge']\n", "['ForceManager']\n", "['Remind']\n", "['LoadUp', 'Technologies,', 'LLC']\n", "['Zenjob', 'GmbH']\n", "['AgencyBloc']\n", "['Serge']\n", "['English', 'for', 'Asia']\n", "['uConnect']\n", "['Plan', 'To', 'Protect']\n", "['Bonfire', 'Interactive', 'Ltd.']\n", "['RogueHire,', 'LLC']\n", "['Total', 'Dealer', 'Compliance']\n", "['SalesForce,', 'Inc.']\n", "['STT', 'Security', '&', 'Investigative', 'Services']\n", "['Encore', 'Healthcare']\n", "['Engel', '&', 'Völkers', 'Americas,', 'Inc']\n", "['Herrmann']\n", "['Cayuse']\n", "['Ursa', 'Major', 'Technologies']\n", "['The', 'Federal', 'Liberal', 'Agency', 'of', 'Canada']\n", "['Company', 'Nurse']\n", "['WeTravel']\n", "['MarketScale']\n", "['Mizuno', 'Usa,', 'Inc.']\n", "['At', 'World', 'Properties,', 'LLC']\n", "['Talener']\n", "['Practo', 'PTE', 'Ltd.']\n", "['Prenda']\n", "['Arrcus']\n", "['Yondr', 'USA', 'LLC']\n", "['Scrivas']\n", "['AutoServe1']\n", "['InvestisDigital']\n", "['Eldersource']\n", "['Dialog']\n", "['Own', 'Solutions', 'Financial', 'Services', 'Limited']\n", "['Crayon']\n", "['Slope', 'Software']\n", "['Rokt', 'Pte', 'Ltd']\n", "['CyberCube']\n", "['Everlaw']\n", "['Compass', 'RE']\n", "['Solo']\n", "['Dolly', 'Inc.']\n", "['LTK']\n", "['Access', 'E-Forms']\n", "['Training', 'Concepts']\n", "['Luma', 'Health,', 'Inc.']\n", "['FRSecure']\n", "['MedicalDirector']\n", "nan\n" ] } ], "source": [ "arr.index.astype(str)\n", "words = arr.index.str.split()\n", "for word in words:\n", " print(word)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "354\n", "509\n", "nan\n", "\n" ] } ], "source": [ "acct_list = []\n", "acct_list = arr.index.to_list()\n", "acct_list2 = pages.index.to_list()\n", "print(len(acct_list))\n", "print(len(acct_list2))\n", "\n", "for item2 in acct_list2:\n", " acct_list.append(item2)\n", "\n", "for item in acct_list:\n", " if type(item) == float:\n", " acct_list.remove(item)\n", " item = str(item)\n", " print(item)\n", " print(type(item))\n", " \n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SilverCar Silvercar\n", "Landis LandisU\n", "AIIM AIIM+\n", "DesktopShipper Desktop Shipper\n", "TripleSeat Tripleseat\n", "InvestisDigital Investis Digital\n", "Bolt Volt\n", "Landis LandisU\n", "Ocean Oceana\n", "Skuad Skool Skuid Skool\n", "10\n" ] } ], "source": [ "tup_list = []\n", "for (name1, name2,) in itertools.combinations(acct_list, 2):\n", " distance = lev(name1, name2)\n", " if distance > 0 and distance < 2:\n", " print(name1, name2)\n", " tup_list.append((name1, name2))\n", "\n", "print(len(tup_list))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "jupyter", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }