合肥seo优化_上海网站托管_app开发公司_宜昌网站建设

英文建站

小江seotaobao、淄博网站推广、永久使用、不限域名、廊坊网站优化

万网空间管理?

合肥seo优化_上海网站托管_app开发公司_宜昌网站建设

matrix2 = []class UserBasedCF(object):''' TopN recommendation - User Based Collaborative Filtering '''def __init__(self):self.trainset = {} # 训练集self.testset = {} # 测试集self.initialset = {} # 存储要推荐的用户的信息self.n_sim_user = 30self.n_rec_movie = 10self.movie_popular = {}self.movie_count = 0 # 总电影数量print('Similar user number = %d' % self.n_sim_user, file=sys.stderr)print('recommended movie number = %d' %self.n_rec_movie, file=sys.stderr)@staticmethoddef loadfile(filename):''' load a file, return a generator. '''fp = open(filename, 'r', encoding='UTF-8')for i, line in enumerate(fp):yield line.strip('\r\n')# if i % 100000 == 0:# print ('loading %s(%s)' % (filename, i), file=sys.stderr)fp.close()print('load %s success' % filename, file=sys.stderr)def initial_dataset(self, filename1):initialset_len = 0for lines in self.loadfile(filename1):users, movies, ratings = lines.split(',')self.initialset.setdefault(users, {})self.initialset[users][movies] = (ratings)initialset_len += 1def generate_dataset(self, filename2, pivot=1.0):''' load rating data and split it to training set and test set '''trainset_len = 0testset_len = 0for line in self.loadfile(filename2):# user, movie, rating, _ = line.split('::')user, movie, rating = line.split(',')# split the data by pivotif random.random() < pivot: # pivot=0.7应该表示训练集:测试集=7:3self.trainset.setdefault(user, {})self.trainset[user][movie] = (rating) # trainset[user][movie]可以获取用户对电影的评分 都是整数trainset_len += 1else:self.testset.setdefault(user, {})self.testset[user][movie] = (rating)testset_len += 1print('split training set and test set succ', file=sys.stderr)print('train set = %s' % trainset_len, file=sys.stderr)print('test set = %s' % testset_len, file=sys.stderr)def calc_user_sim(self):movie2users = dict()for user, movies in self.trainset.items():for movie in movies:# inverse table for item-usersif movie not in movie2users:movie2users[movie] = set()movie2users[movie].add(user) # 看这个电影的用户id# print(movie) #输出的是movieId# print(movie2users[movie]) #输出的是{'userId'...}# print(movie2users) #movieId:{'userId','userId'...}# count item popularity at the same timeif movie not in self.movie_popular:self.movie_popular[movie] = 0self.movie_popular[movie] += 1# print ('build movie-users inverse table succ', file=sys.stderr)# save the total movie number, which will be used in evaluationself.movie_count = len(movie2users)print('total movie number = %d' % self.movie_count, file=sys.stderr)# count co-rated items between users 计算用户之间共同评分的物品usersim_mat = user_sim_mat# print ('building user co-rated movies matrix...', file=sys.stderr)for movie, users in movie2users.items(): # 通过.items()遍历movie2users这个字典里的所有键、值for u in users:for v in users:if u == v:continueusersim_mat.setdefault(u, {})usersim_mat[u].setdefault(v, 0)usersim_mat[u][v] += 1 / math.log(1 + len(users)) # usersim_mat二维矩阵应该存的是用户u和用户v之间共同评分的电影数目# print ('build user co-rated movies matrix succ', file=sys.stderr)# calculate similarity matrix# print ('calculating user similarity matrix...', file=sys.stderr)simfactor_count = 0PRINT_STEP = 20000for u, related_users in usersim_mat.items():for v, count in related_users.items():usersim_mat[u][v] = count / math.sqrt(len(self.trainset[u]) * len(self.trainset[v]))simfactor_count += 1def recommend(self, user):''' Find K similar users and recommend N movies. '''matrix.clear() #每次都要清空K = self.n_sim_user # 这里等于20N = self.n_rec_movie # 这里等于10rank = dict() # 用户对电影的兴趣度# print(self.initialset[user])watched_movies = self.trainset[user] # user用户已经看过的电影 只包括训练集里的# 这里之后不能是训练集# watched_movies = self.initialset[user]for similar_user, similarity_factor in sorted(user_sim_mat[user].items(),key=itemgetter(1), reverse=True)[0:K]: # itemgetter(1)表示对第2个域(相似度)排序 reverse=TRUE表示降序for imdbid in self.trainset[similar_user]: # similar_user是items里面的键,就是所有用户 similarity_factor是值,就是对应的相似度if imdbid in watched_movies:continue # 如果该电影用户已经看过,则跳过# predict the user's "interest" for each movierank.setdefault(imdbid, 0) # 没有值就为0rank[imdbid] += similarity_factor #rank[movie]就是各个电影的相似度# 这里是把和各个用户的相似度加起来,而各个用户的相似度只是基于看过的公共电影数目除以这两个用户看过的电影数量积#print(rank[movie])# return the N best movies# rank_ = dict()rank_ = sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N] #类型是list不是字典了for key,value in rank_:matrix.append(key) #matrix为存储推荐的imdbId号的数组#print(key) #得到了推荐的电影的imdbid号print(matrix)#return sorted(rank.items(), key=itemgetter(1), reverse=True)[0:N]return matrix# class UserBasedCF(object): 自建网站太原网站优化深圳小程序公司安徽网络营销网站建设苏州在线购物商城系统上海的网络推广厦门推广公司成都网站优化多少钱东莞网站推广优化如何建购物网站江西seo网站优化外包公司东莞网络营销外包小程序公司上海网站营销重庆整合网络营销青岛网页设计宁德seo深圳网络推广方案企业主页设计宁波外贸网站建设义乌seo深圳软件开发定制网站首页模板南京做网站泉州网站优化杭州seo网站优化天津网络营销西安seo外包推百拉

猜你喜欢