Skip to content

Commit

Permalink
Add py files
Browse files Browse the repository at this point in the history
  • Loading branch information
susanli2016 authored Dec 31, 2021
1 parent ef24180 commit 6cce29f
Show file tree
Hide file tree
Showing 5 changed files with 342 additions and 0 deletions.
60 changes: 60 additions & 0 deletions bpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from util import m_normal, learning_rate, get_lambda
from classes import ret
import random as random
import numpy as np
import math
def bpr_update(users, movies):
count = 0
lr = learning_rate()
lam = get_lambda()
for u1 in users:
u = users[u1]
userid = u.userid
Vu = u.factor
if (len(u.movies_train) > 0):

rand_pos = random.sample(u.movies_train.keys(), 1)[0]
rand_neg = random.sample(movies.keys(), 1)[0]

if rand_neg not in u.movies_train:
Vi = movies[rand_pos].factor
Vj = movies[rand_neg].factor
firstterm = calculate_first_term(Vu, Vi, Vj)

# USER FACTOR
diff = Vi - Vj
d = firstterm * diff
derivative = d
Vu = Vu + lr * (derivative + lam * np.linalg.norm(Vu))
users[u1].factor = Vu

# ITEM POSITIVE FACTOR
d = firstterm * Vu
derivative = d
Vi = Vi + lr * (derivative + lam * np.linalg.norm(Vi))
movies[rand_pos].factor = Vi

#ITEM NEGATIVE FACTOR
negvu = -1 * Vu
d = firstterm * negvu
derivative = d
Vj = Vj + lr * (derivative + lam * np.linalg.norm(Vj))
movies[rand_neg].factor = Vj

def calculate_first_term(Vu, Vi, Vj):
boughtdot = np.dot(Vu, Vi)
notboughtdot = np.dot(Vu, Vj)
negxuij = (boughtdot - notboughtdot) * -1
if negxuij > 500:
negxuij = 500
numerator = math.exp(negxuij)
denominator = 1 + math.exp(negxuij)
firstterm = numerator / denominator
return firstterm

36 changes: 36 additions & 0 deletions classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from util import random_vector
class user:
def __init__(self, userid):
self.userid = userid
self.movies_train = dict()
self.movies_test = dict()
self.movies_all = dict()
self.factor = random_vector()

class movie:
def __init__(self, movieid, rating=0, title=None, genres=None):
self.movieid = movieid
self.rating = rating
self.title = title
self.genres = genres
self.factor = random_vector()

class ret:
def __init__(self):
self.userid = None
self.movieid = None
self.isuser = True
self.retvalue = []

class usermovie:
def __init__(self):
self.userid = None
self.movieid = None
self.rating = 0

82 changes: 82 additions & 0 deletions filereader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from classes import user
from classes import movie
from numpy import random
from util import min_rating, random_vector, num_users
from random import seed
import pandas as pd
import numpy as np
def read_ratings(filename):
seed(42)
np.random.seed(42)
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(filename, sep=',', names=r_cols, encoding='latin-1')

ratings['user_id'] = ratings['user_id'].astype(int)
ratings['movie_id'] = ratings['movie_id'].astype(int)
ratings['rating'] = ratings['rating'].astype(float)

numusers = num_users()

msks = ratings['user_id'] < numusers
ratings = ratings[msks]
users = dict()
testcount = 0
traincount = 0
trainuserdict = dict()

for index, row in ratings.iterrows():
userid = int(row['user_id'])
movieid = int(row['movie_id'])
rating1 = float(row['rating'])
minmovierating = min_rating()
if rating1 >= minmovierating:
if random.random() < 0.7:
traincount = traincount + 1
if userid in users.keys():
user1 = users[userid]
user1.movies_train[movieid] = rating1
else:
user1 = user(userid)
user1.factor = random_vector()
user1.movies_train[movieid] = rating1
users[userid] = user1
trainuserdict[userid] = 1
else:
testcount = testcount + 1
if userid in users.keys():
user1 = users[userid]
user1.movies_test[movieid] = rating1
else:
user1 = user(userid)
user1.factor = random_vector()
user1.movies_test[movieid] = rating1
users[userid] = user1

for index, row in ratings.iterrows():
userid = int(row['user_id'])
movieid = int(row['movie_id'])
rating1 = float(row['rating'])
if userid in users.keys():
user1 = users[userid]
user1.movies_all[movieid] = rating1

return users

def read_movies(filename):
r_cols = ['movie_id', 'title', 'genres']
df = pd.read_csv(filename, sep=",", encoding='latin-1', names=r_cols)
movies = dict()
for index, row in df.iterrows():
movieid = row['movie_id']
movie1 = movie(movieid, 0)
movie1.factor = random_vector()
movies[movieid] = movie1

return movies

124 changes: 124 additions & 0 deletions hitrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from classes import usermovie
import numpy as np
from sklearn.metrics import mean_squared_error
from math import sqrt

def hit_rate(users, movies):
hits = 0
denom = 0
actual = []
predicted = []
actualall = []
predictedall = []
for u1 in users:
u = users[u1]
userid = u.userid
usermovies = []
if userid in users:
denom = denom + 1
ufactor = users[userid].factor
for m1 in movies:
m = movies[m1]
mfactor = m.factor
dotp = np.dot(ufactor, mfactor)
if m.movieid in u.movies_all:
actualall.append(u.movies_all[m.movieid])
predictedall.append(float(dotp))

if m.movieid in u.movies_test:
actual.append(u.movies_test[m.movieid])
predicted.append(dotp)

usermovied = usermovie()
usermovied.userid = userid
usermovied.movieid = m.movieid
usermovied.rating = dotp
usermovies.append(usermovied)

usermovies.sort(key=lambda x: x.rating, reverse=True)
count = 0
for um in usermovies:
userid = um.userid
movieid = um.movieid
#rating = um.rating
if movieid in users[userid].movies_test:
hits = hits + 1
break
count = count + 1
if count > 9:
break

sortedpredicted = predicted
least = min(sortedpredicted)
sortedpredicted = [x + least for x in sortedpredicted]
sortedpredicted = [x / max(sortedpredicted) for x in sortedpredicted]
sortedpredicted = [x * 5 for x in sortedpredicted]
predicted = sortedpredicted

sortedpredicted = predictedall
least = min(sortedpredicted)
sortedpredicted = [x + least for x in sortedpredicted]
sortedpredicted = [x / max(sortedpredicted) for x in sortedpredicted]
sortedpredicted = [x * 5 for x in sortedpredicted]
predictedall = sortedpredicted

rms = sqrt(mean_squared_error(actual, predicted))
rmsall = sqrt(mean_squared_error(actualall, predictedall))

return hits, denom, rms, rmsall

def hit_rate_SVD(users, movies, svd):
hits = 0
denom = 0
actual = []
predicted = []
actualall = []
predictedall = []
for u1 in users:
u = users[u1]
userid = u.userid
usermovies = []
if userid in users:
denom = denom + 1
for m1 in movies:
m = movies[m1]
dotp = float(svd.predict(int(userid), int(m.movieid))[3])

if m.movieid in u.movies_all:
actualall.append(u.movies_all[m.movieid])
predictedall.append(float(dotp))

if (str(m.movieid) in u.movies_test) | (int(m.movieid) in u.movies_test):
actual.append(u.movies_test[m.movieid])
predicted.append(float(dotp))

usermovied = usermovie()
usermovied.userid = userid
usermovied.movieid = m.movieid
usermovied.rating = dotp
usermovies.append(usermovied)

usermovies.sort(key=lambda x: x.rating, reverse=True)
count = 0
for um in usermovies:
userid = um.userid
movieid = um.movieid

if (str(movieid) in users[userid].movies_test) | (int(movieid) in users[userid].movies_test):
hits = hits + 1
break
count = count + 1
if count > 9:
break

rms = sqrt(mean_squared_error(actual, predicted))
rmsall = sqrt(mean_squared_error(actualall, predictedall))

return hits, denom, rms, rmsall

40 changes: 40 additions & 0 deletions util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from numpy import random
import numpy as np

def num_users():
return 1000000

def dimension():
return 50

def min_rating():
return 4

def learning_rate():
return 1

def get_lambda():
return 0.1

def random_vector():
dim = dimension()
cov_mtx = cov_matrix()
return random.multivariate_normal(np.zeros(dim), cov_mtx)

def cov_matrix():
dim = dimension()
cov = np.zeros((dim, dim), dtype=float)
for i in range(dim):
cov[i][i] = 0.1
return cov

def m_normal(mean):
cov_mtx = cov_matrix()
return random.multivariate_normal(mean=mean, cov=cov_mtx)

0 comments on commit 6cce29f

Please sign in to comment.