利用Python简单实现网易云用户算法推荐系统( 三 )


1.针对用户推荐 网易云音乐(每日30首歌/7首歌) 2.针对歌曲 在你听某首歌的时候 , 找“相似歌曲”
7. Surprise推荐库简单介绍在推荐系统的建模过程中 , 我们将用到python库 Surprise(Simple Python RecommendatIon System Engine) , 是scikit系列中的一个(很多同学用过scikit-learn和scikit-image等库) 。
具体的配合这篇博文(Python推荐系统库——Surprise)深入学习Surprise 。
8. 网易云音乐歌单推荐利用surprise推荐库中KNN协同过滤算法进行已有数据的建模 , 并且推荐相似的歌单预测
# -*- coding:utf-8-*-"""利用surprise推荐库 KNN协同过滤算法推荐网易云歌单python2.7环境"""from __future__ import (absolute_import, division, print_function, unicode_literals)import osimport csvfrom surprise import KNNBaseline, Reader, KNNBasic, KNNWithMeans,evaluatefrom surprise import Datasetdef recommend_model():file_path = os.path.expanduser('neteasy_playlist_recommend_data.csv')# 指定文件格式reader = Reader(line_format='user item rating timestamp', sep=',')# 从文件读取数据music_data = http://kandian.youth.cn/index/Dataset.load_from_file(file_path, reader=reader)# 计算歌曲和歌曲之间的相似度train_set = music_data.build_full_trainset()print('开始使用协同过滤算法训练推荐模型...')algo = KNNBasic()algo.fit(train_set)return algodef playlist_data_preprocessing():csv_reader = csv.reader(open('neteasy_playlist_id_to_name_data.csv'))id_name_dic = {}name_id_dic = {}for row in csv_reader:id_name_dic[row[0]] = row[1]name_id_dic[row[1]] = row[0]return id_name_dic, name_id_dicdef song_data_preprocessing():csv_reader = csv.reader(open('neteasy_song_id_to_name_data.csv'))id_name_dic = {}name_id_dic = {}for row in csv_reader:id_name_dic[row[0]] = row[1]name_id_dic[row[1]] = row[0]return id_name_dic, name_id_dicdef playlist_recommend_main():print("加载歌单id到歌单名的字典映射...")print("加载歌单名到歌单id的字典映射...")id_name_dic, name_id_dic = playlist_data_preprocessing()print("字典映射成功...")print('构建数据集...')algo = recommend_model()print('模型训练结束...')current_playlist_id = id_name_dic.keys()[200]print('当前的歌单id:' + current_playlist_id)current_playlist_name = id_name_dic[current_playlist_id]print('当前的歌单名字:' + current_playlist_name)playlist_inner_id = algo.trainset.to_inner_uid(current_playlist_id)print('当前的歌单内部id:' + str(playlist_inner_id))playlist_neighbors = algo.get_neighbors(playlist_inner_id, k=10)playlist_neighbors_id = (algo.trainset.to_raw_uid(inner_id) for inner_id in playlist_neighbors)# 把歌曲id转成歌曲名字playlist_neighbors_name = (id_name_dic[playlist_id] for playlist_id in playlist_neighbors_id)print("和歌单<", current_playlist_name, '> 最接近的10个歌单为:\n')for playlist_name in playlist_neighbors_name:print(playlist_name, name_id_dic[playlist_name])playlist_recommend_main()# "E:\ProgramingSoftware\PyCharm Community Edition 2016.2.3\Anaconda2\python2.exe" C:/Users/Administrator/Desktop/博客素材/recommend_system_learning/recommend_main.py# 加载歌单id到歌单名的字典映射...# 加载歌单名到歌单id的字典映射...# 字典映射成功...# 构建数据集...# 开始使用协同过滤算法训练推荐模型...# Computing the msd similarity matrix...# Done computing similarity matrix.# 模型训练结束...# 当前的歌单id:2056644233# 当前的歌单名字:暖阳微醺◎来碗甜度100%的糖水吧# 当前的歌单内部id:444# 和歌单


推荐阅读