佛山企业网站建设特色/网上销售培训课程
将预测评分矩阵分解为用户特征矩阵和项目特征矩阵,预测评分计算式为:
目标函数为:
根据梯度下降计算,参数更新式为:
该算法的Python代码为:
import math
import random
import matplotlib.pyplot as plt# 求平均值
def Average(fileName):fi = open(fileName, 'r')result = 0.0cnt = 0for line in fi:cnt += 1arr = line.split()result += int(arr[2].strip())return result / cnt# 计算矩阵点积
def InerProduct(v1, v2):result = 0for i in range(len(v1)):result += v1[i] * v2[i]return result'''
定义预测评分计算式
参数声明:
av:平均值
bu: 用户评分与用户平均的偏差
bi: 项目评分与项目平均的偏差
pu: 用户特征矩阵
qi: 项目特征矩阵
'''
def PredictScore(av, bu, bi, pu, qi):pScore = av + bu + bi + InerProduct(pu, qi)if pScore < 1:pScore = 1elif pScore > 5:pScore = 5return pScoredef SVD(configureFile, testDataFile, trainDataFile, modelSaveFile):# 从congigure文件中得到用户数、项目数、特征维度、学习率以及正则参数fi = open(configureFile, 'r')line = fi.readline()arr = line.split()averageScore = float(arr[0].strip())userNum = int(arr[1].strip())itemNum = int(arr[2].strip())factorNum = int(arr[3].strip())learnRate = float(arr[4].strip())regularization = float(arr[5].strip())fi.close()# 初始化模型bi = [0.0 for i in range(itemNum)]bu = [0.0 for i in range(userNum)]temp = math.sqrt(factorNum)qi = [[(0.1 * random.random() / temp) for j in range(factorNum)] for i in range(itemNum)] pu = [[(0.1 * random.random() / temp) for j in range(factorNum)] for i in range(userNum)]print("initialization end\nstart training\n")# 训练模型s = []rmse = []preRmse = 1000000.0iteration = 1000for step in range(iteration):fi = open(trainDataFile, 'r') for line in fi:arr = line.split()uid = int(arr[0].strip()) - 1iid = int(arr[1].strip()) - 1score = int(arr[2].strip()) prediction = PredictScore(averageScore, bu[uid], bi[iid], pu[uid], qi[iid])eui = score - prediction# 更新参数bu[uid] += learnRate * (eui - regularization * bu[uid])bi[iid] += learnRate * (eui - regularization * bi[iid]) for k in range(factorNum):temp = pu[uid][k] #attention here, must save the value of pu before updatingpu[uid][k] += learnRate * (eui * qi[iid][k] - regularization * pu[uid][k])qi[iid][k] += learnRate * (eui * temp - regularization * qi[iid][k])fi.close()learnRate *= 0.9curRmse = Validate(testDataFile, averageScore, bu, bi, pu, qi)print("test_RMSE in step %d: %f" %(step, curRmse))if curRmse >= preRmse:breakelse:preRmse = curRmses.append(step)rmse.append(curRmse)print(s)print(rmse)plt.plot(s, rmse)plt.show()return s, rmse# 验证模型
def Validate(testDataFile, av, bu, bi, pu, qi):cnt = 0rmse = 0.0fi = open(testDataFile, 'r')for line in fi:cnt += 1arr = line.split()uid = int(arr[0].strip()) - 1iid = int(arr[1].strip()) - 1pScore = PredictScore(av, bu[uid], bi[iid], pu[uid], qi[iid])tScore = int(arr[2].strip())rmse += (tScore - pScore) * (tScore - pScore)fi.close()return math.sqrt(rmse / cnt)if __name__ == '__main__':configureFile = 'svd.conf'trainDataFile = 'ml_data\\training.txt'testDataFile = 'ml_data\\test.txt'modelSaveFile = 'svd_model.pkl'resultSaveFile = 'prediction'SVD(configureFile, testDataFile, trainDataFile, modelSaveFile)