import pandas as pd
import numpy as np 

tracks = pd.read_csv('tracks.csv', encoding='utf-8')
tracks['Days in top 50'] = tracks['Days in top 50'].astype(int)
tracks['Max Rank'] = tracks['Max Rank'].astype(int)
tracks

artists = pd.read_csv('artists.csv')
artists['lat'] = artists['lat'].astype(float)
artists['long'] = artists['long'].astype(float)
artists['age'] = artists['age'].astype(int)

artists

# sometimes we have one track multiple times

track_counts = tracks['Track ID'].value_counts()
track_counts

Track ID
3w0w2T288dec0mgeZZqoNN    2
6J3Yay84inLJ6b37lpaHFi    2
61wsDs3Dbb11h1m2tw9eMZ    2
649kk5N2PY67N2TkTBOagZ    2
4K1Pg0FLno1ltzX3jeqT83    2
                         ..
7lxOjQyVsHQ8toQeT7IgCc    1
1Is8hGpkGMiePASAxBluxM    1
0SKbj3NTkGFS76OkFmAw0u    1
1jadydWtmjFcdlGOi0G6ci    1
3FQCJI2t5LTbsRPfYVBSVB    1
Name: count, Length: 2555, dtype: int64

tracks[tracks['Track ID'] == '649kk5N2PY67N2TkTBOagZ']

tracks[tracks['Track ID'] == '4K1Pg0FLno1ltzX3jeqT83']

def unique_artists(s):
    return ', '.join(list(set(s.to_list())))

duplicate_tracks = track_counts[track_counts > 1].reset_index()
duplicate_tracks_details = duplicate_tracks.merge(tracks, left_on='Track ID', right_on='Track ID')
artist_grouped = duplicate_tracks_details[['Track ID','Artists','Days in top 50', 'Max Rank']].groupby('Track ID').agg({
    'Artists': unique_artists,
    'Days in top 50': 'sum', 
    'Max Rank': 'min'       
}).reset_index()
artist_grouped

duplicate_track_filter = ~tracks['Track ID'].isin(artist_grouped['Track ID'])
# remove duplicates
tracks = tracks[duplicate_track_filter]
# union the data back together
tracks = pd.concat([tracks,artist_grouped], axis=0).reset_index(drop=True)
tracks

np.where(tracks.value_counts('Track ID') > 1)

(array([], dtype=int64),)

artists.isna().any()

name    False
lat     False
long    False
age     False
dtype: bool

tracks.isna().any()

Track ID          False
Artists           False
Days in top 50    False
Max Rank          False
dtype: bool

valid_coordinates_filter = (artists['lat'] >= -90) & (artists['lat'] <= 90) & (artists['long'] >= -180) & (artists['long'] <= 180)
artists = artists[valid_coordinates_filter].reset_index(drop=True)
artists

artist_counts = artists['name'].value_counts()
artist_counts

name
The Weeknd            75
JAY-Z                 47
Swizz Beatz           10
EKKSTACY               8
Gracie Abrams          5
                      ..
Noah Cyrus             1
Chip                   1
Rod Wave               1
Kid Ink                1
Kardinal Offishall     1
Name: count, Length: 592, dtype: int64

artists[artists['name'] == 'The Weeknd']

dup_names = artist_counts[artist_counts > 1].reset_index()
duplicate_names_details = dup_names.merge(artists, left_on='name', right_on='name')
duplicate_names_details

clean_dup_names = duplicate_names_details.groupby('name').first()[['lat','long','age']].reset_index()
clean_dup_names

artists = artists[~artists['name'].isin(clean_dup_names['name'])]
artists = pd.concat([artists,clean_dup_names], axis=0)
artists.name.value_counts()

name
YNW Melly            1
YFN Lucci            1
XXXTENTACION         1
Wizkid               1
Wiz Khalifa          1
                    ..
Bebe Rexha           1
Sfera Ebbasta        1
NGHTMRE              1
Machine Gun Kelly    1
Lorde                1
Name: count, Length: 592, dtype: int64

artists['age'].value_counts().sort_index().plot.bar()

<Axes: xlabel='age'>

(np.abs(artists['age'] -29) > 5).value_counts(normalize=True).rename({True:'Close to me in age',False:'Not close in age'}) * 100

age
Not close in age      54.898649
Close to me in age    45.101351
Name: proportion, dtype: float64

artists['age'].plot.hist(bins=30)

<Axes: ylabel='Frequency'>

artists['log_age'] = np.log1p(artists['age'])
artists['log_age'].plot.hist(bins=30)

<Axes: ylabel='Frequency'>

# scale the data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(artists[['lat', 'long', 'log_age']])
artists_scaled = artists.copy()
artist_features_scaled = scaler.transform(artists_scaled[['lat', 'long', 'log_age']])
artist_features_scaled = pd.DataFrame(artist_features_scaled, columns = [['lat', 'long', 'log_age']])
artist_features_scaled

from sklearn.cluster import KMeans

def make_clusters(df, max_clusters):
    res = list()
    for i in range(1,max_clusters+1):
        print(i)
        model = KMeans(n_clusters=i, n_init=4, random_state=42)
        model.fit(df)
        res.append((i,model.inertia_))
    return res

clusters = make_clusters(artist_features_scaled[['lat','long', 'log_age']], 10)
clusters

[(1, 1776.0000000000002),
 (2, 1243.0138329983556),
 (3, 899.855773023381),
 (4, 592.971220100517),
 (5, 503.32247649022355),
 (6, 423.42722440280136),
 (7, 371.1567761071731),
 (8, 345.8383779908504),
 (9, 296.2264013167676),
 (10, 264.02339769470257)]

import matplotlib.pyplot as plt
k_range = [item[0] for item in clusters]
inertias = [item[1] for item in clusters]
plt.plot(k_range, inertias, marker='o')
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>

import numpy as np

model = KMeans(n_clusters=4, n_init=4, random_state=42)
model.fit(artist_features_scaled[['lat','long', 'log_age']])

artists['cluster'] = model.labels_

for cluster in np.unique(model.labels_):
    _df = artists[artists['cluster'] == cluster]
    display(_df)
    display(_df.describe())

my_data = pd.DataFrame([[41.83, -87.62, np.log1p(29)]], columns=artists_scaled[['lat','long','log_age']].columns)

my_transformed_data = pd.DataFrame(scaler.transform(my_data), columns = artists_scaled[['lat','long','log_age']].columns)
my_cluster_num = model.predict(my_transformed_data)[0]
my_cluster_num

C:\Users\fritz\OneDrive\cse6040ec\env\Lib\site-packages\sklearn\utils\validation.py:2732: UserWarning: X has feature names, but KMeans was fitted without feature names
  warnings.warn(

np.int32(0)

similar_artists = artists[artists['cluster'] == my_cluster_num].name.sort_values().to_list()
similar_artists

['$NOT',
 '24hrs',
 '24kGoldn',
 '42 Dugg',
 '6LACK',
 '6ix9ine',
 '93PUNX',
 '9lokknine',
 'A Boogie Wit da Hoodie',
 'A$AP Ferg',
 'A$AP Rocky',
 'ATL Jacob',
 'Aaron May',
 'Addison Rae',
 'Alex Goot',
 'Allan Kingdom',
 'Almighty Jay',
 'Amaarae',
 'Aminé',
 'Ansel Elgort',
 'Anuel AA',
 'Ari Lennox',
 'Ariana Grande',
 'Arizona Zervas',
 'Armani White',
 'Ashnikko',
 'Asian Doll',
 'AzChike',
 'B.o.B',
 'BLP KOSHER',
 'Baby Keem',
 'Baby Tate',
 'Bad Bunny',
 'Bailey Zimmerman',
 'Bankrol Hayden',
 'Banks',
 'Bazzi',
 'Bebe Rexha',
 'Benson Boone',
 'Bfb Da Packman',
 'Bhad Bhabie',
 'Big Sean',
 'BigXthaPlug',
 'Billie Eilish',
 'Billy Strings',
 'BlocBoy JB',
 'Blood Orange',
 'Blueface',
 'Bobby Raps',
 'Brendon Urie',
 'Brent Faiyaz',
 'Bryce Vine',
 'Bryson Tiller',
 'Calboy',
 'Camila Cabello',
 'Cardi B',
 'Carly Pearce',
 'Cash Cobain',
 'Caskey',
 'Chance the Rapper',
 'Chappell Roan',
 'Chief Keef',
 'Chris Brown',
 'Clairo',
 'Coi Leray',
 'Comethazine',
 'Cordae',
 'Corey Kent',
 'Cousin Stizz',
 'DC The Don',
 'DDG',
 'DaBaby',
 'DaniLeigh',
 'Daniel Caesar',
 'Davido',
 'Dax',
 'Daya',
 'Demi Lovato',
 'Desiigner',
 'Devin Dawson',
 'Djo',
 'Doe Boy',
 'Doechii',
 'Doja Cat',
 'Dominic Fike',
 'Don Toliver',
 'Drakeo the Ruler',
 'Dro Kenji',
 'Dylan Scott',
 'EKKSTACY',
 'EST Gee',
 'Earl Sweatshirt',
 'Elley Duhé',
 'FINNEAS',
 'Famous Dex',
 'Fivio Foreign',
 'Flipp Dinero',
 'Flo Milli',
 'Frank Ocean',
 'Fredo Bang',
 'G Herbo',
 'G-Eazy',
 'Giveon',
 'GloRilla',
 'GoldLink',
 'Gunna',
 'Harry Mack',
 'Hit-Boy',
 'Hitta J3',
 'Hoodie Allen',
 'Hotboii',
 'Hunxho',
 'ILLENIUM',
 'Isabel LaRosa',
 'JID',
 'JT',
 'Jack Harlow',
 'Jaden',
 'Jason Derulo',
 'Jay Critch',
 'Jeremih',
 'Jessie Murph',
 'Jessie Reyez',
 'JoJo Siwa',
 'Joey Bada$$',
 'John Summit',
 'Jon Bellion',
 'Joyner Lucas',
 'Juice WRLD',
 'Junior H',
 'Justin Bieber',
 'KA$HDAMI',
 'KAYTRANADA',
 'KYLE',
 'Kacey Musgraves',
 'Kacy Hill',
 'Kali Uchis',
 'Kane Brown',
 'Kehlani',
 'Kelsea Ballerini',
 'Ken Carson',
 'Kendrick Lamar',
 'Kenny Beats',
 'Kesha',
 'Kevin Abstract',
 'Key Glock',
 'Kiiara',
 'King Von',
 'Kodak Black',
 'Kodie Shane',
 'Koe Wetzel',
 'LUCKI',
 'Lainey Wilson',
 'Latto',
 'Lauren Alaina',
 'Lauren Daigle',
 'Lauren Jauregui',
 'Lay Bankz',
 'Lil Baby',
 'Lil Durk',
 'Lil Gnar',
 'Lil Gotit',
 'Lil Keed',
 'Lil Mosey',
 'Lil Nas X',
 'Lil Peep',
 'Lil Pump',
 'Lil Skies',
 'Lil Tecca',
 'Lil Tjay',
 'Lil Uzi Vert',
 'Lil Xan',
 'Lil Yachty',
 'Logic',
 'Luh Kel',
 'Luke Combs',
 'Lute',
 'Lyrical Lemonade',
 'MGK',
 'MO3',
 'Mac Miller',
 'Machine Gun Kelly',
 'MadeinTYO',
 'Madison Beer',
 'Maggie Lindemann',
 'Maggie Rogers',
 'Maluma',
 'Manuel Turizo',
 'Maren Morris',
 'Mariah the Scientist',
 'Mario Judah',
 'Mark Ambor',
 'Marshmello',
 'Masego',
 'Maxo Kream',
 'Meek Mill',
 'Megan Thee Stallion',
 'Melanie Martinez',
 'Metro Boomin',
 'Mike Dimes',
 'Mike Posner',
 'Mike WiLL Made-It',
 'Miley Cyrus',
 'Moneybagg Yo',
 'Morgan Wallen',
 'Mr.Kitty',
 'Muni Long',
 'Murda Beatz',
 'NAV',
 'NF',
 'NGHTMRE',
 'NLE Choppa',
 'Naomi Wild',
 'Nebu Kiniza',
 'Nicky Youre',
 'NoCap',
 'Noah Cyrus',
 'Noah Kahan',
 'OG Parker',
 'Offset',
 'Oliver Anthony Music',
 'Oliver Tree',
 'Ozuna',
 'PARTYNEXTDOOR',
 'Peso Pluma',
 'Playboi Carti',
 'PnB Rock',
 'Polo G',
 'Pooh Shiesty',
 'Pop Smoke',
 'Popcaan',
 'Post Malone',
 'Pressa',
 'Princess Nokia',
 'Quavo',
 'R.LUM.R',
 'ROZES',
 'Ravyn Lenae',
 'Remi Wolf',
 'Rich Amiri',
 'Rich Homie Quan',
 'Rich The Kid',
 'Rico Nasty',
 'Rihanna',
 'Rob $tone',
 'Rob49',
 'Rod Wave',
 'Roddy Ricch',
 'Roscoe Dash',
 'Roy Woods',
 'Rylo Rodriguez',
 'SZA',
 'Sabrina Carpenter',
 'Sabrina Claudio',
 'Sada Baby',
 'Saweetie',
 'Scorey',
 'Sean Kingston',
 'Selena Gomez',
 'Sexyy Red',
 'Shaboozey',
 'Shy Glizzy',
 'Sierra Ferrell',
 'Sirah',
 'Ski Mask The Slump God',
 'Skip Marley',
 'Skizzy Mars',
 'Skrillex',
 'Sleepy Hallow',
 'Smokepurpp',
 'SoFaygo',
 'Speaker Knockerz',
 'SpotemGottem',
 'Stephen Swartz',
 'Steve Lacy',
 'Stunna Gambino',
 'Sueco',
 'Summer Walker',
 'Superstar Pride',
 'Swae Lee',
 'Swizz Beatz',
 'Takeoff',
 'Tana Mongeau',
 'Tate McRae',
 'Tay-K',
 'Tayla Parx',
 'Taylor Swift',
 'Tee Grizzley',
 'Teezo Touchdown',
 'That Mexican OT',
 'The Weeknd',
 'Theophilus London',
 'Thouxanbanfauni',
 'Tommy Richman',
 'Toosii',
 'Tori Kelly',
 'Tory Lanez',
 'Travis Scott',
 'Trippie Redd',
 'Tyga',
 'Tyla Yaweh',
 'Tyler',
 'Tyler Childers',
 'Victoria Monét',
 'Vince Staples',
 'WILLOW',
 'Whethan',
 'Wiz Khalifa',
 'XXXTENTACION',
 'Y2K',
 'YBN Nahmir',
 'YFN Lucci',
 'YG',
 'YNW BSlime',
 'YNW Melly',
 'Yeat',
 'Young M.A',
 'Young Thug',
 'YoungBoy Never Broke Again',
 'Yung Bans',
 'Yung Miami',
 'Zacari',
 'benny blanco',
 'blackbear',
 'charlieonnafriday',
 'd4vd',
 'lil aaron',
 'mgk',
 'midwxst',
 'tana']

my_cluster_artists = artists['cluster'] == my_cluster_num
my_cluster = artists[my_cluster_artists].drop(columns='log_age')
my_cluster.describe()

plt.scatter(my_cluster['lat'], my_cluster['long'])
plt.scatter([41.83], [-87.62], color='red') 
plt.show()

def calculate_centroid(points):
    x_coords = points['lat']
    y_coords = points['long']
    
    cx = sum(x_coords) / len(points)
    cy = sum(y_coords) / len(points)
    return (cx, cy)
    
calculate_centroid(my_cluster[['lat', 'long']])

(35.845709595959626, -89.15983417508411)

cluster_centroids= artists[['cluster','lat','long']].groupby('cluster', group_keys=True)[['lat','long']].apply(calculate_centroid).reset_index().rename(columns={0:'centroids'})
cluster_centroids['lat_center'], cluster_centroids['long_center'] = zip(*cluster_centroids['centroids'])
cluster_centroids = cluster_centroids.drop(columns='centroids')
"""
nearest major city
0 - Kassel, Germany
1 - Indian Ocean
2 - Western France
3 - Nashville, Tenessee
4 - Memphis, Tenessee
"""
cluster_centroids

tracks['name'] = tracks['Artists'].str.split(',')
tracks_expanded = tracks.explode('name').drop(columns=['Artists'])
tracks_expanded

# to avoid duplication I'm going to group this df by name and count the unique tracks

artist_track_summary = tracks_expanded.groupby('name').agg({
    'Track ID': pd.Series.nunique,
    'Days in top 50':'sum',
    'Max Rank':['mean', 'sum']
}).reset_index()
artist_track_summary.columns = [f'{k} - {v}' for k,v in zip(artist_track_summary.columns.get_level_values(0), artist_track_summary.columns.get_level_values(1))]
artist_track_summary = artist_track_summary.rename(columns={
    'name - ':'name'  
})

artist_track_summary['Max Rank - mean'] = artist_track_summary['Max Rank - mean'].apply(round)
artist_track_summary

song_artists = artists.merge(artist_track_summary, left_on='name', right_on='name')
song_artists

song_artist_summary = song_artists.groupby('cluster').agg({
    'name': 'nunique',
    'age': 'mean',
    'Track ID - nunique': 'sum',
    'Days in top 50 - sum':'sum',
    'Max Rank - sum':'sum'
}).reset_index()

song_artist_summary= song_artist_summary.rename(columns={
    'name': 'Artist Count',
    'age': 'Avg Age',
    'Track ID - nunique': 'Unique Tracks',
    'Days in top 50 - sum':'Total Days in Top 50',
    'Max Rank - sum':'Total Max Rank'
})
song_artist_summary

from sklearn.preprocessing import MinMaxScaler

features_scaled = song_artist_summary.copy()
features_scaled[['Artist Count', 'Avg Age', 'Unique Tracks', 'Total Days in Top 50', 'Total Max Rank']] = MinMaxScaler().fit_transform(features_scaled[['Artist Count', 'Avg Age', 'Unique Tracks', 'Total Days in Top 50', 'Total Max Rank']])
features_scaled

song_artist_summary = song_artist_summary.merge(cluster_centroids, left_on='cluster', right_on='cluster')
song_artist_summary['Score'] = features_scaled['Unique Tracks'] + features_scaled['Total Days in Top 50'] - features_scaled['Total Max Rank']
song_artist_summary['Cluster Rank'] = song_artist_summary['Score'].rank(axis=0, ascending=False).astype(int)
song_artist_summary['is My Cluster'] = song_artist_summary['cluster'] == my_cluster_num
song_artist_summary.merge(cluster_centroids,left_on='cluster', right_on='cluster')
song_artist_summary[['cluster','Avg Age', 'Artist Count', 'Unique Tracks', 'Total Days in Top 50', 'Total Max Rank', 'lat_center', 'long_center', 'Cluster Rank','is My Cluster']].sort_values('Cluster Rank')

artists[artists['cluster'] == my_cluster_num]

artists[artists['cluster'] == my_cluster_num].describe()

	Track ID	Artists	Days in top 50	Max Rank
0	29cluXfG8A0sXDjUHNY8fv	David Burns	111	1
1	2ut4BOQSqxLpcX5MtPjzYa	Drake, J. Cole	98	2
2	1MVqeIAwhD4T44AKVkIfic	Fred again.., Baby Keem	33	2
3	21vc2kQZMS00cAyNT82a1M	¥$, Kanye West, Ty Dolla $ign, Bump J	79	4
4	6ie2Bw3xLj2JcGowOlcMhb	Lorde	22	5
...	...	...	...	...
2555	5byWPNSh2hi0ULmDxBgLyV	Coi Leray, Pooh Shiesty	2	186
2556	5vGtrfeMjb03aDHGDaIakG	blink-182	2	190
2557	6435Ra0NWQzPyZAcd1ojWI	Kylie Minogue	2	191
2558	6Ntz1uQoMF8L2E4w518BFp	Caravan Palace	2	195
2559	6bnF93Rx87YqUBLSgjiMU8	The Weeknd	2	200

	name	lat	long	age
0	Lil Uzi Vert	39.952778	-75.163611	28
1	Nicki Minaj	10.666667	-61.516667	41
2	Juice WRLD	41.881944	-87.627778	25
3	Drake	43.741667	-79.373333	37
4	J. Cole	50.110556	8.682222	38
...	...	...	...	...
1194	Kodie Shane	33.748889	-84.390000	26
1195	Flipp Dinero	50.666667	-88.033333	29
1196	Pharrell Williams	36.850000	-75.977778	51
1197	Young Nudy	1266.350000	-659.766667	32
1198	Pooh Shiesty	35.117500	-89.971111	25

	Track ID	Artists	Days in top 50	Max Rank
0	29cluXfG8A0sXDjUHNY8fv	David Burns	111	1
1	2ut4BOQSqxLpcX5MtPjzYa	Drake, J. Cole	98	2
2	1MVqeIAwhD4T44AKVkIfic	Fred again.., Baby Keem	33	2
3	21vc2kQZMS00cAyNT82a1M	¥$, Kanye West, Ty Dolla $ign, Bump J	79	4
4	6ie2Bw3xLj2JcGowOlcMhb	Lorde	22	5
...	...	...	...	...
2550	3w0w2T288dec0mgeZZqoNN	¥$, Kanye West, Ty Dolla $ign, Rich The Kid, P...	114	5
2551	4K1Pg0FLno1ltzX3jeqT83	Kendrick Lamar, Wallie the Sensei, Roddy Ricch...	29	24
2552	61wsDs3Dbb11h1m2tw9eMZ	Childish Gambino, Ludwig Göransson, Childish G...	26	93
2553	649kk5N2PY67N2TkTBOagZ	Powfu, Daniel Saint, Powfu, Daniel Saint, Chil...	25	39
2554	6J3Yay84inLJ6b37lpaHFi	mgk, Naomi Wild, Machine Gun Kelly, Naomi Wild	27	16

	name	lat	long	age
0	Lil Uzi Vert	39.952778	-75.163611	28
1	Nicki Minaj	10.666667	-61.516667	41
2	Juice WRLD	41.881944	-87.627778	25
3	Drake	43.741667	-79.373333	37
4	J. Cole	50.110556	8.682222	38
...	...	...	...	...
1078	DaniLeigh	25.766667	-80.200000	30
1079	Kodie Shane	33.748889	-84.390000	26
1080	Flipp Dinero	50.666667	-88.033333	29
1081	Pharrell Williams	36.850000	-75.977778	51
1082	Pooh Shiesty	35.117500	-89.971111	25

	name	lat	long	age
91	The Weeknd	43.741667	-79.373333	33
193	The Weeknd	43.741667	-79.373333	34
442	The Weeknd	54.250000	-87.766667	20
448	The Weeknd	39.758056	-94.836667	51
452	The Weeknd	40.712778	-74.006111	38
...	...	...	...	...
840	The Weeknd	38.347222	-81.633333	36
844	The Weeknd	51.800000	-0.200000	25
845	The Weeknd	40.780000	-73.479444	35
859	The Weeknd	38.643611	-77.260833	29
861	The Weeknd	40.279722	-86.505833	30

Introduction¶

Tracks¶

Artists¶

Data Cleaning¶

Do I listen to artists that are similar to me more than artists that arent?¶

	Track ID	Artists	Days in top 50	Max Rank
2275	649kk5N2PY67N2TkTBOagZ	Powfu, Daniel Saint	4	84
2315	649kk5N2PY67N2TkTBOagZ	Powfu, Daniel Saint, Chill Sebs	21	39

	Track ID	Artists	Days in top 50	Max Rank
2353	4K1Pg0FLno1ltzX3jeqT83	Kendrick Lamar, Wallie the Sensei, Roddy Ricch...	1	126
2358	4K1Pg0FLno1ltzX3jeqT83	Kendrick Lamar, Wallie the Sensei, Siete7x, Ro...	28	24

	name	lat	long	age
0	2 Chainz	33.617500	-84.467500	46
1	24kGoldn	37.777500	-122.416389	23
2	42 Dugg	42.331389	-83.045833	29
3	6LACK	39.289444	-76.615278	31
4	6ix9ine	40.712778	-74.006111	27
...	...	...	...	...
281	benny blanco	38.954444	-77.346389	35
282	charlieonnafriday	47.609722	-122.333056	21
283	d4vd	40.750000	-73.866667	18
284	lil aaron	41.581944	-85.836667	29
285	tana	32.492222	-84.940278	17

	lat	long	log_age
0	-5.086067	5.015286	-0.690032
1	-0.420283	-0.511777	-0.049256
2	0.370619	-0.064817	-0.049256
3	0.684496	1.628808	-0.249336
4	0.346801	-0.074377	0.046412
...	...	...	...
587	0.223624	-0.142723	0.139385
588	0.829954	-1.063208	-1.485943
589	0.349408	-0.071524	-1.969781
590	0.407689	-0.316445	-0.462334
591	-0.229076	-0.298104	-2.148220

	name	lat	long	age	log_age	cluster
38	Machine Gun Kelly	29.762778	-95.383056	33	3.526361	0
41	NGHTMRE	41.052778	-73.538889	33	3.526361	0
67	Bebe Rexha	40.712778	-74.006111	34	3.555348	0
83	Brendon Urie	37.104167	-113.584167	36	3.610918	0
93	PnB Rock	39.952778	-75.163611	32	3.496508	0
...	...	...	...	...	...	...
281	benny blanco	38.954444	-77.346389	35	3.583519	0
282	charlieonnafriday	47.609722	-122.333056	21	3.091042	0
283	d4vd	40.750000	-73.866667	18	2.944439	0
284	lil aaron	41.581944	-85.836667	29	3.401197	0
285	tana	32.492222	-84.940278	17	2.890372	0

	lat	long	age	log_age	cluster
count	330.000000	330.000000	330.000000	330.000000	330.0
mean	35.845710	-89.159834	28.515152	3.373833	0.0
std	6.726657	16.041305	4.288397	0.151254	0.0
min	6.230833	-157.858333	16.000000	2.833213	0.0
25%	33.465625	-95.383056	25.000000	3.258097	0.0
50%	36.132500	-84.390000	29.000000	3.401197	0.0
75%	40.750000	-77.314722	32.000000	3.496508	0.0
max	51.050000	-52.796944	36.000000	3.610918	0.0

	name	lat	long	age	log_age	cluster
46	Sfera Ebbasta	45.533333	9.233333	31	3.465736	1
87	R3HAB	51.588889	4.775833	37	3.637586	1
88	Yung Lean	59.329444	18.068611	27	3.332205	1
182	Ozzy Osbourne	59.750000	-13.333333	75	4.330733	1
184	San Holo	52.066667	4.500000	33	3.526361	1
...	...	...	...	...	...	...
218	ScHoolboy Q	50.082500	8.240000	37	3.637586	1
229	Stefflon Don	52.480000	-1.902500	32	3.496508	1
230	Stephen	48.208333	16.372500	32	3.496508	1
270	YUNGBLUD	53.522778	-1.132500	26	3.295837	1
280	Zach Bryan	26.500000	128.000000	27	3.332205	1

	lat	long	age	log_age	cluster
count	77.000000	77.000000	77.000000	77.000000	77.0
mean	51.555480	9.078582	34.896104	3.548290	1.0
std	7.268873	27.512443	10.287428	0.244114	0.0
min	24.466667	-13.933333	23.000000	3.178054	1.0
25%	50.110556	-1.133333	28.000000	3.367296	1.0
50%	51.800000	0.119167	34.000000	3.555348	1.0
75%	53.522778	8.682222	37.000000	3.637586	1.0
max	67.083333	129.091667	75.000000	4.330733	1.0

	name	lat	long	age	log_age	cluster
121	Paul McCartney	53.407500	-2.991944	81	4.406719	2
125	Katy Perry	34.416667	-119.700000	39	3.688879	2
136	DJ Drama	39.952778	-75.163611	45	3.828641	2
162	Ben Shapiro	34.050000	-118.250000	40	3.713572	2
210	Preme	45.508889	-73.554167	38	3.663562	2
...	...	...	...	...	...	...
262	Waka Flocka Flame	40.716667	-74.000000	38	3.663562	2
263	Wale	38.904722	-77.016389	39	3.688879	2
264	Watsky	37.777500	-122.416389	38	3.663562	2
272	Yelawolf	36.055000	-86.672500	44	3.806662	2
273	Yo Gotti	35.117500	-89.971111	42	3.761200	2

	lat	long	age	log_age	cluster
count	163.000000	163.000000	163.000000	163.000000	163.0
mean	35.536103	-88.135494	50.355828	3.891459	2.0
std	7.111707	18.240165	23.538432	0.266721	0.0
min	6.230833	-157.858333	37.000000	3.637586	2.0
25%	33.470972	-94.430833	40.000000	3.713572	2.0
50%	35.117500	-84.540556	45.000000	3.828641	2.0
75%	40.712778	-77.391667	52.000000	3.970292	2.0
max	53.407500	-0.583333	295.000000	5.690359	2.0

	name	lat	long	age	log_age	cluster
11	Lorde	-36.840556	174.740000	27	3.332205	3
144	Keith Urban	-35.725000	174.323611	56	4.043051	3
148	Burna Boy	4.824167	7.033611	32	3.496508	3
202	Flume	-33.867778	151.210000	32	3.496508	3
269	Ayra Starr	6.366667	2.433333	21	3.091042	3
283	LIT killah	-34.766667	-58.616667	24	3.218876	3
335	Tekno	10.315833	9.844167	33	3.526361	3
492	Cody Simpson	-28.016667	153.400000	27	3.332205	3
523	Nonô	-22.911111	-43.205556	26	3.295837	3
639	Zerb	-23.550000	-46.633333	26	3.295837	3
680	Tkay Maidza	-17.829167	31.052222	28	3.367296	3
736	Kimbra	-37.783333	175.283333	34	3.555348	3
820	Masked Wolf	-33.867778	151.210000	33	3.526361	3
855	Kylie Minogue	-37.814167	144.963056	56	4.043051	3
901	Troye Sivan	-26.204444	28.045556	29	3.401197	3
963	beabadoobee	10.716667	122.566667	24	3.218876	3
1020	Asake	6.583333	3.750000	29	3.401197	3
39	CKay	10.516667	7.433333	28	3.367296	3
70	Duki	-34.600000	-58.416667	27	3.332205	3
97	Illy	-38.133333	145.116667	37	3.637586	3
243	The Kid LAROI	-33.900278	151.207778	20	3.044522	3
266	Wizkid	6.500000	3.350000	33	3.526361	3

	lat	long	age	log_age	cluster
count	22.000000	22.000000	22.000000	22.000000	22.0
mean	-19.090316	65.004141	31.000000	3.434078	3.0
std	19.672185	87.499289	9.133924	0.246272	0.0
min	-38.133333	-58.616667	20.000000	3.044522	3.0
25%	-34.725000	3.450000	26.250000	3.304929	3.0
50%	-27.110556	29.548889	28.500000	3.384247	3.0
75%	5.981042	151.209444	33.000000	3.526361	3.0
max	10.716667	175.283333	56.000000	4.043051	3.0

	name	Track ID - nunique	Days in top 50 - sum	Max Rank - mean	Max Rank - sum
0	.Eehou	2	14	112	224
1	070 Shake	1	99	43	43
2	11:11 Music Group	1	1	174	174
3	18YOMAN	1	7	78	78
4	2 Chainz	8	195	130	1041
...	...	...	...	...	...
1437	thủy	1	22	92	92
1438	uniivrss	1	25	121	121
1439	xOHARA	1	18	60	60
1440	xander.	2	22	110	221
1441	¥$	19	689	76	1449

	cluster	Artist Count	Avg Age	Unique Tracks	Total Days in Top 50	Total Max Rank
0	0	231	28.233766	1166	35128	123185
1	1	50	34.180000	181	5165	17958
2	2	92	47.326087	429	11596	50061
3	3	13	32.461538	33	1000	3396

	cluster	Artist Count	Avg Age	Unique Tracks	Total Days in Top 50	Total Max Rank
0	0	1.000000	0.000000	1.000000	1.000000	1.000000
1	1	0.169725	0.311446	0.130627	0.122041	0.121564
2	2	0.362385	1.000000	0.349515	0.310478	0.389560
3	3	0.000000	0.221438	0.000000	0.000000	0.000000