diff --git a/BPR_Music_RecSys.ipynb b/BPR_Music_RecSys.ipynb
new file mode 100644
index 0000000..8bf356c
--- /dev/null
+++ b/BPR_Music_RecSys.ipynb
@@ -0,0 +1,1239 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "e08f25dd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import scipy.sparse as sparse\n",
+ "from scipy.sparse.linalg import spsolve\n",
+ "import random\n",
+ "\n",
+ "from sklearn.preprocessing import MinMaxScaler\n",
+ "\n",
+ "import implicit\n",
+ "\n",
+ "# Load the data\n",
+ "raw_data = pd.read_table('data/usersha1-artmbid-artname-plays.tsv')\n",
+ "raw_data = raw_data.drop(raw_data.columns[1], axis=1)\n",
+ "raw_data.columns = ['user', 'artist', 'plays']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "52d78ae2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " artist | \n",
+ " plays | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " die Ärzte | \n",
+ " 1099 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " melissa etheridge | \n",
+ " 897 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " elvenking | \n",
+ " 717 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " juliette & the licks | \n",
+ " 706 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " red hot chili peppers | \n",
+ " 691 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user artist plays\n",
+ "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099\n",
+ "1 00000c289a1829a808ac09c00daf10bc3c4e223b melissa etheridge 897\n",
+ "2 00000c289a1829a808ac09c00daf10bc3c4e223b elvenking 717\n",
+ "3 00000c289a1829a808ac09c00daf10bc3c4e223b juliette & the licks 706\n",
+ "4 00000c289a1829a808ac09c00daf10bc3c4e223b red hot chili peppers 691"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "raw_data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "d2bd5fc5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "raw_data1 = raw_data[0:2000000]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "58320658",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total number of users in the data: 40913\n",
+ "Total number of artists in the data: 110820\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'Total number of users in the data: {raw_data1.user.nunique()}')\n",
+ "print(f'Total number of artists in the data: {raw_data1.artist.nunique()}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "ffcd2158",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = raw_data1.dropna()\n",
+ "data = data.copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "79cc77d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data['user'] = data['user'].astype('category')\n",
+ "data['artist'] = data['artist'].astype('category')\n",
+ "data['user_id'] = data['user'].cat.codes\n",
+ "data['artist_id'] = data['artist'].cat.codes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "5912ec13",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " artist | \n",
+ " plays | \n",
+ " user_id | \n",
+ " artist_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " die Ärzte | \n",
+ " 1099 | \n",
+ " 0 | \n",
+ " 30264 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " melissa etheridge | \n",
+ " 897 | \n",
+ " 0 | \n",
+ " 66512 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user artist plays \\\n",
+ "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099 \n",
+ "1 00000c289a1829a808ac09c00daf10bc3c4e223b melissa etheridge 897 \n",
+ "\n",
+ " user_id artist_id \n",
+ "0 0 30264 \n",
+ "1 0 66512 "
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "0c90e21b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sparse_item_user = sparse.csr_matrix((data['plays'].astype(float), (data['artist_id'], data['user_id'])))\n",
+ "sparse_user_item = sparse.csr_matrix((data['plays'].astype(float), (data['user_id'], data['artist_id'])))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "3ec92ab1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<110820x40913 sparse matrix of type ''\n",
+ "\twith 1999967 stored elements in Compressed Sparse Row format>"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sparse_item_user"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "165220e6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<40913x110820 sparse matrix of type ''\n",
+ "\twith 1999967 stored elements in Compressed Sparse Row format>"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sparse_user_item"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "a6a0a966",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "99.95588936009682"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "matrix_size = sparse_user_item.shape[0]*sparse_user_item.shape[1]\n",
+ "num_purchases = len(sparse_user_item.nonzero()[0])\n",
+ "sparsity = 100*(1 - (num_purchases / matrix_size))\n",
+ "sparsity"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "f2eb765a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import random\n",
+ "\n",
+ "def make_train(ratings, pct_test = 0.2):\n",
+ " \n",
+ " test_set = ratings.copy() # Make a copy of the original set to be the test set. \n",
+ " test_set[test_set != 0] = 1 # Store the test set as a binary preference matrix\n",
+ " \n",
+ " training_set = ratings.copy() # Make a copy of the original data we can alter as our training set. \n",
+ " \n",
+ " nonzero_inds = training_set.nonzero() # Find the indices in the ratings data where an interaction exists\n",
+ " nonzero_pairs = list(zip(nonzero_inds[0], nonzero_inds[1])) # Zip these pairs together of item,user index into list\n",
+ "\n",
+ " \n",
+ " random.seed(0) # Set the random seed to zero for reproducibility\n",
+ " \n",
+ " num_samples = int(np.ceil(pct_test*len(nonzero_pairs))) # Round the number of samples needed to the nearest integer\n",
+ " samples = random.sample(nonzero_pairs, num_samples) # Sample a random number of item-user pairs without replacement\n",
+ "\n",
+ " item_inds = [index[0] for index in samples] # Get the item row indices\n",
+ "\n",
+ " user_inds = [index[1] for index in samples] # Get the user column indices\n",
+ "\n",
+ " \n",
+ " training_set[item_inds, user_inds] = 0 # Assign all of the randomly chosen user-item pairs to zero\n",
+ " training_set.eliminate_zeros() # Get rid of zeros in sparse array storage after update to save space\n",
+ " \n",
+ " return training_set, test_set, list(set(user_inds))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "2f0c5ec2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "product_train, product_test, product_users_altered = make_train(sparse_item_user, pct_test = 0.05)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "772579e9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:root:Intel MKL BLAS detected. Its highly recommend to set the environment variable 'export MKL_NUM_THREADS=1' to disable its internal multithreading\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1578cae65f7441e4a76cf9c5cad9092b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/40 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=40)\n",
+ "\n",
+ "# Calculate the confidence by multiplying it by our alpha value.(alpha value corresponds to the confidence metric \n",
+ "# that we discussed earlier)\n",
+ "\n",
+ "alpha_val = 15\n",
+ "data_conf = (product_train * alpha_val).astype('double')\n",
+ "\n",
+ "# We have used an alpha_val of 15 after performing some iterations with different alpha values\n",
+ "#Fit the model\n",
+ "model.fit(data_conf)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "6d849627",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "item_vecs = model.item_factors\n",
+ "user_vecs = model.user_factors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "ade4155c",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Shape of the artist vector matrix: (110820, 20)\n",
+ "Shape of the user vector matrix: (40913, 20)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'Shape of the artist vector matrix: {item_vecs.shape}')\n",
+ "print(f'Shape of the user vector matrix: {user_vecs.shape}')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "71645861",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn import metrics\n",
+ "import matplotlib.pylab as plt\n",
+ "def auc_score(predictions, test):\n",
+ " '''\n",
+ " This simple function will output the area under the curve using sklearn's metrics. \n",
+ " \n",
+ " parameters:\n",
+ " \n",
+ " - predictions: your prediction output\n",
+ " \n",
+ " - test: the actual target result you are comparing to\n",
+ " \n",
+ " returns:\n",
+ " \n",
+ " - AUC (area under the Receiver Operating Characterisic curve)\n",
+ " '''\n",
+ " fpr, tpr, thresholds = metrics.roc_curve(test, predictions)\n",
+ " return metrics.auc(fpr, tpr)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "df97a4b8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def calc_mean_auc(training_set, altered_users, predictions, test_set):\n",
+ " \n",
+ " store_auc = [] # An empty list to store the AUC for each user that had an item removed from the training set\n",
+ " popularity_auc = [] # To store popular AUC scores\n",
+ " pop_items = np.array(test_set.sum(axis = 1)).reshape(-1) # Get sum of item iteractions to find most popular\n",
+ " item_vecs = predictions[1]\n",
+ " for user in altered_users: # Iterate through each user that had an item altered\n",
+ " training_column = training_set[:,user].toarray().reshape(-1) # Get the training set column\n",
+ " zero_inds = np.where(training_column == 0) # Find where the interaction had not yet occurred\n",
+ " \n",
+ " # Get the predicted values based on our user/item vectors\n",
+ " user_vec = predictions[0][user,:]\n",
+ " pred = user_vec.dot(item_vecs).toarray()[0,zero_inds].reshape(-1)\n",
+ " \n",
+ " # Get only the items that were originally zero\n",
+ " # Select all ratings from the MF prediction for this user that originally had no iteraction\n",
+ " actual = test_set[:,user].toarray()[zero_inds,0].reshape(-1)\n",
+ " \n",
+ " # Select the binarized yes/no interaction pairs from the original full data\n",
+ " # that align with the same pairs in training \n",
+ " pop = pop_items[zero_inds] # Get the item popularity for our chosen items\n",
+ " \n",
+ " store_auc.append(auc_score(pred, actual)) # Calculate AUC for the given user and store\n",
+ " \n",
+ " popularity_auc.append(auc_score(pop, actual)) # Calculate AUC using most popular and score\n",
+ " # End users iteration\n",
+ " \n",
+ " return float('%.3f'%np.mean(store_auc)), float('%.3f'%np.mean(popularity_auc))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "0532d2c4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(0.962, 0.934)"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "calc_mean_auc(product_train, product_users_altered,\n",
+ " [sparse.csr_matrix(user_vecs), sparse.csr_matrix(item_vecs.T)], product_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "c6fd8a4c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " artist | \n",
+ " plays | \n",
+ " user_id | \n",
+ " artist_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 4 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " red hot chili peppers | \n",
+ " 691 | \n",
+ " 0 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 1422 | \n",
+ " 000429493d9716b66b02180d208d09b5b89fbe64 | \n",
+ " red hot chili peppers | \n",
+ " 234 | \n",
+ " 29 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 2139 | \n",
+ " 0007e26aafcfc0b6dcb87d7041583fbb7cced88a | \n",
+ " red hot chili peppers | \n",
+ " 159 | \n",
+ " 44 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 3284 | \n",
+ " 000b0bb32f149504e1df3cce85b6bfd20cef3dd0 | \n",
+ " red hot chili peppers | \n",
+ " 46 | \n",
+ " 68 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 3322 | \n",
+ " 000b2ee840cbda56e0f41c8f248c4fb7ee275db3 | \n",
+ " red hot chili peppers | \n",
+ " 87 | \n",
+ " 69 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 1998983 | \n",
+ " 1d2c0053f7e585caa02ccbb3a96b708c078e9300 | \n",
+ " red hot chili peppers | \n",
+ " 1024 | \n",
+ " 40892 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 1999418 | \n",
+ " 1d2d697dbe47814d1de4064508b32ec00a1ec767 | \n",
+ " red hot chili peppers | \n",
+ " 384 | \n",
+ " 40901 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 1999642 | \n",
+ " 1d2db9f945283c3f48ab685a84a78429a63a3f19 | \n",
+ " red hot chili peppers | \n",
+ " 66 | \n",
+ " 40905 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 1999823 | \n",
+ " 1d2e1fa030318413f194f741c674b6032094a0ce | \n",
+ " red hot chili peppers | \n",
+ " 389 | \n",
+ " 40909 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ " 1999889 | \n",
+ " 1d2e583e63cbd9cfc526b53f90f4fd438271336e | \n",
+ " red hot chili peppers | \n",
+ " 72 | \n",
+ " 40910 | \n",
+ " 80876 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5693 rows × 5 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user artist \\\n",
+ "4 00000c289a1829a808ac09c00daf10bc3c4e223b red hot chili peppers \n",
+ "1422 000429493d9716b66b02180d208d09b5b89fbe64 red hot chili peppers \n",
+ "2139 0007e26aafcfc0b6dcb87d7041583fbb7cced88a red hot chili peppers \n",
+ "3284 000b0bb32f149504e1df3cce85b6bfd20cef3dd0 red hot chili peppers \n",
+ "3322 000b2ee840cbda56e0f41c8f248c4fb7ee275db3 red hot chili peppers \n",
+ "... ... ... \n",
+ "1998983 1d2c0053f7e585caa02ccbb3a96b708c078e9300 red hot chili peppers \n",
+ "1999418 1d2d697dbe47814d1de4064508b32ec00a1ec767 red hot chili peppers \n",
+ "1999642 1d2db9f945283c3f48ab685a84a78429a63a3f19 red hot chili peppers \n",
+ "1999823 1d2e1fa030318413f194f741c674b6032094a0ce red hot chili peppers \n",
+ "1999889 1d2e583e63cbd9cfc526b53f90f4fd438271336e red hot chili peppers \n",
+ "\n",
+ " plays user_id artist_id \n",
+ "4 691 0 80876 \n",
+ "1422 234 29 80876 \n",
+ "2139 159 44 80876 \n",
+ "3284 46 68 80876 \n",
+ "3322 87 69 80876 \n",
+ "... ... ... ... \n",
+ "1998983 1024 40892 80876 \n",
+ "1999418 384 40901 80876 \n",
+ "1999642 66 40905 80876 \n",
+ "1999823 389 40909 80876 \n",
+ "1999889 72 40910 80876 \n",
+ "\n",
+ "[5693 rows x 5 columns]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[data['artist'] == 'red hot chili peppers']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "2e4294bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "red hot chili peppers\n",
+ "muse\n",
+ "nirvana\n",
+ "coldplay\n",
+ "queen\n",
+ "the killers\n",
+ "foo fighters\n",
+ "placebo\n",
+ "pink floyd\n",
+ "the beatles\n"
+ ]
+ }
+ ],
+ "source": [
+ "artist_id = 80876\n",
+ "n_similar = 10 # getting the top ten similar items\n",
+ "\n",
+ "# Use implicit to get similar items.\n",
+ "similar = model.similar_items(artist_id, n_similar)\n",
+ "# Print the names of our most similar artists\n",
+ "for artist in similar:\n",
+ " idx, score = artist\n",
+ " print (data.artist.loc[data.artist_id == idx].iloc[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "0d37cf46",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " artist | \n",
+ " plays | \n",
+ " user_id | \n",
+ " artist_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " die Ärzte | \n",
+ " 1099 | \n",
+ " 0 | \n",
+ " 30264 | \n",
+ "
\n",
+ " \n",
+ " 2943 | \n",
+ " 000a1585c5f65532a9c9187a882892982d345a5c | \n",
+ " die Ärzte | \n",
+ " 148 | \n",
+ " 61 | \n",
+ " 30264 | \n",
+ "
\n",
+ " \n",
+ " 3787 | \n",
+ " 000cb6427411006fe9a6193d3c4f59efed53fbef | \n",
+ " die Ärzte | \n",
+ " 7 | \n",
+ " 78 | \n",
+ " 30264 | \n",
+ "
\n",
+ " \n",
+ " 6295 | \n",
+ " 0014ffc91d3a5b59cce9bceaf22ef0d72e5711b8 | \n",
+ " die Ärzte | \n",
+ " 88 | \n",
+ " 128 | \n",
+ " 30264 | \n",
+ "
\n",
+ " \n",
+ " 13513 | \n",
+ " 003059a886782e4d7936da913d3f064f637d0b2b | \n",
+ " die Ärzte | \n",
+ " 5 | \n",
+ " 274 | \n",
+ " 30264 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user artist plays user_id \\\n",
+ "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099 0 \n",
+ "2943 000a1585c5f65532a9c9187a882892982d345a5c die Ärzte 148 61 \n",
+ "3787 000cb6427411006fe9a6193d3c4f59efed53fbef die Ärzte 7 78 \n",
+ "6295 0014ffc91d3a5b59cce9bceaf22ef0d72e5711b8 die Ärzte 88 128 \n",
+ "13513 003059a886782e4d7936da913d3f064f637d0b2b die Ärzte 5 274 \n",
+ "\n",
+ " artist_id \n",
+ "0 30264 \n",
+ "2943 30264 \n",
+ "3787 30264 \n",
+ "6295 30264 \n",
+ "13513 30264 "
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[data['artist'] == 'die Ärzte'].head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "edb749cc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "die Ärzte\n",
+ "guano apes\n",
+ "mando diao\n",
+ "soundtrack\n",
+ "apocalyptica\n",
+ "him\n",
+ "limp bizkit\n",
+ "[unknown]\n",
+ "bloodhound gang\n",
+ "nightwish\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Find the 10 most similar to die Ärzte\n",
+ "artist_id = 30264\n",
+ "n_similar = 10 # getting the top ten similar items\n",
+ "\n",
+ "# Use implicit to get similar items.\n",
+ "similar = model.similar_items(artist_id, n_similar)\n",
+ "# Print the names of our most similar artists\n",
+ "for artist in similar:\n",
+ " idx, score = artist\n",
+ " print (data.artist.loc[data.artist_id == idx].iloc[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "076e43ad",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data['rank'] = data.groupby(['user_id'])['plays'].rank(ascending = False)\n",
+ "\n",
+ "# filtering for their first choice\n",
+ "data_1 = data[data['rank'] == 1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "336f236b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " artist | \n",
+ " plays | \n",
+ " user_id | \n",
+ " artist_id | \n",
+ " rank | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 00000c289a1829a808ac09c00daf10bc3c4e223b | \n",
+ " die Ärzte | \n",
+ " 1099 | \n",
+ " 0 | \n",
+ " 30264 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 25686 | \n",
+ " 005d521c9f8b1acfc13b7a4cc4b39085edfc786a | \n",
+ " die Ärzte | \n",
+ " 933 | \n",
+ " 523 | \n",
+ " 30264 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 29112 | \n",
+ " 006aaaaf386fdbb0aea3f2bf9019e346a7294b6a | \n",
+ " die Ärzte | \n",
+ " 946 | \n",
+ " 595 | \n",
+ " 30264 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 30440 | \n",
+ " 006f93b4213be13020a3819e0d0a86dcf97b58de | \n",
+ " die Ärzte | \n",
+ " 1375 | \n",
+ " 622 | \n",
+ " 30264 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 53937 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " die Ärzte | \n",
+ " 2924 | \n",
+ " 1094 | \n",
+ " 30264 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user artist plays user_id \\\n",
+ "0 00000c289a1829a808ac09c00daf10bc3c4e223b die Ärzte 1099 0 \n",
+ "25686 005d521c9f8b1acfc13b7a4cc4b39085edfc786a die Ärzte 933 523 \n",
+ "29112 006aaaaf386fdbb0aea3f2bf9019e346a7294b6a die Ärzte 946 595 \n",
+ "30440 006f93b4213be13020a3819e0d0a86dcf97b58de die Ärzte 1375 622 \n",
+ "53937 00c465e5b33365ab91cc5cf161590a38044954af die Ärzte 2924 1094 \n",
+ "\n",
+ " artist_id rank \n",
+ "0 30264 1.0 \n",
+ "25686 30264 1.0 \n",
+ "29112 30264 1.0 \n",
+ "30440 30264 1.0 \n",
+ "53937 30264 1.0 "
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_1[data_1['artist_id'] == 30264].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "1e957969",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " user | \n",
+ " artist | \n",
+ " plays | \n",
+ " user_id | \n",
+ " artist_id | \n",
+ " rank | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 53937 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " die Ärzte | \n",
+ " 2924 | \n",
+ " 1094 | \n",
+ " 30264 | \n",
+ " 1.0 | \n",
+ "
\n",
+ " \n",
+ " 53938 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " equilibrium | \n",
+ " 1936 | \n",
+ " 1094 | \n",
+ " 36491 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " 53939 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " ensiferum | \n",
+ " 1782 | \n",
+ " 1094 | \n",
+ " 36343 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 53940 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " system of a down | \n",
+ " 1167 | \n",
+ " 1094 | \n",
+ " 92524 | \n",
+ " 4.0 | \n",
+ "
\n",
+ " \n",
+ " 53941 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " sdp | \n",
+ " 1042 | \n",
+ " 1094 | \n",
+ " 85406 | \n",
+ " 5.0 | \n",
+ "
\n",
+ " \n",
+ " 53942 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " deichkind | \n",
+ " 1013 | \n",
+ " 1094 | \n",
+ " 29016 | \n",
+ " 6.0 | \n",
+ "
\n",
+ " \n",
+ " 53943 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " knorkator | \n",
+ " 920 | \n",
+ " 1094 | \n",
+ " 57157 | \n",
+ " 7.0 | \n",
+ "
\n",
+ " \n",
+ " 53944 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " typ:t.u.r.b.o. | \n",
+ " 911 | \n",
+ " 1094 | \n",
+ " 103029 | \n",
+ " 8.0 | \n",
+ "
\n",
+ " \n",
+ " 53945 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " serj tankian | \n",
+ " 813 | \n",
+ " 1094 | \n",
+ " 85997 | \n",
+ " 9.0 | \n",
+ "
\n",
+ " \n",
+ " 53946 | \n",
+ " 00c465e5b33365ab91cc5cf161590a38044954af | \n",
+ " rise against | \n",
+ " 779 | \n",
+ " 1094 | \n",
+ " 82040 | \n",
+ " 10.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " user artist plays \\\n",
+ "53937 00c465e5b33365ab91cc5cf161590a38044954af die Ärzte 2924 \n",
+ "53938 00c465e5b33365ab91cc5cf161590a38044954af equilibrium 1936 \n",
+ "53939 00c465e5b33365ab91cc5cf161590a38044954af ensiferum 1782 \n",
+ "53940 00c465e5b33365ab91cc5cf161590a38044954af system of a down 1167 \n",
+ "53941 00c465e5b33365ab91cc5cf161590a38044954af sdp 1042 \n",
+ "53942 00c465e5b33365ab91cc5cf161590a38044954af deichkind 1013 \n",
+ "53943 00c465e5b33365ab91cc5cf161590a38044954af knorkator 920 \n",
+ "53944 00c465e5b33365ab91cc5cf161590a38044954af typ:t.u.r.b.o. 911 \n",
+ "53945 00c465e5b33365ab91cc5cf161590a38044954af serj tankian 813 \n",
+ "53946 00c465e5b33365ab91cc5cf161590a38044954af rise against 779 \n",
+ "\n",
+ " user_id artist_id rank \n",
+ "53937 1094 30264 1.0 \n",
+ "53938 1094 36491 2.0 \n",
+ "53939 1094 36343 3.0 \n",
+ "53940 1094 92524 4.0 \n",
+ "53941 1094 85406 5.0 \n",
+ "53942 1094 29016 6.0 \n",
+ "53943 1094 57157 7.0 \n",
+ "53944 1094 103029 8.0 \n",
+ "53945 1094 85997 9.0 \n",
+ "53946 1094 82040 10.0 "
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[data['user_id'] == 1094].head(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "78235cbb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " artist score\n",
+ "0 tyler bates 1.337788\n",
+ "1 jan hegenberg 1.193714\n",
+ "2 five finger death punch 1.190098\n",
+ "3 slagsmålsklubben 1.178774\n",
+ "4 daniel licht 1.163203\n",
+ "5 kaizers orchestra 1.141305\n",
+ "6 eav 1.137476\n",
+ "7 fightstar 1.131872\n",
+ "8 flyleaf 1.128522\n",
+ "9 volbeat 1.127203\n",
+ "10 saliva 1.124229\n",
+ "11 freedom call 1.123379\n",
+ "12 jennifer rostock 1.118862\n",
+ "13 flobots 1.117408\n",
+ "14 skillet 1.115119\n",
+ "15 machinae supremacy 1.112002\n",
+ "16 eisbrecher 1.111799\n",
+ "17 aiden 1.107183\n",
+ "18 asp 1.102318\n",
+ "19 drowning pool 1.102212\n"
+ ]
+ }
+ ],
+ "source": [
+ "user_id = 1094\n",
+ "\n",
+ "# Use the implicit recommender.\n",
+ "recommended = model.recommend(user_id, sparse_user_item,N = 20,filter_already_liked_items = False)\n",
+ "\n",
+ "artists = []\n",
+ "scores = []\n",
+ "\n",
+ "# Get artist names from ids\n",
+ "for item in recommended:\n",
+ " idx, score = item\n",
+ " artists.append(data.artist.loc[data.artist_id == idx].iloc[0])\n",
+ " scores.append(score)\n",
+ "\n",
+ "# Create a dataframe of artist names and scores\n",
+ "recommendations = pd.DataFrame({'artist': artists, 'score': scores})\n",
+ "\n",
+ "print (recommendations)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8bd6eee9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}