下面是一个基本的JVAVA程序, RecommenderIntro.java
package xyz.pl8.recommenderintro;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;import org.apache.mahout.cf.taste.recommender.RecommendedItem;import org.apache.mahout.cf.taste.recommender.Recommender;import org.apache.mahout.cf.taste.similarity.UserSimilarity;import java.io.File;import java.util.List;public class RecommenderIntro { public static void main(String[] args){ try{ // intro.csv格式 userId,itemId,rating DataModel model = new FileDataModel(new File("/home/hadoop/intro.csv")); System.out.println(model); // 用户相似度 UserSimilarity similarity = new PearsonCorrelationSimilarity(model); // K近邻用户 UserNeighborhood neighborhood = new NearestNUserNeighborhood(3, similarity, model ); // 基于用户的推荐器 Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); // 推荐物品 ListrecommendedItems = recommender.recommend(2, 2); for (RecommendedItem item : recommendedItems){ System.out.println(item); } }catch (Exception e){ e.printStackTrace(); } }}
下面是基于物品的多线程批推荐
package xyz.pl8.userrecommendermovielens;import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;import org.apache.mahout.cf.taste.impl.similarity.precompute.FileSimilarItemsWriter;import org.apache.mahout.cf.taste.impl.similarity.precompute.MultithreadedBatchItemSimilarities;import org.apache.mahout.cf.taste.model.DataModel;import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;import org.apache.mahout.cf.taste.similarity.ItemSimilarity;import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;import java.io.File;import java.io.IOException;public class BatchItemSimilaritiesIntro { public static void main(String[] args) throws IOException { if (args.length !=1 ){ System.err.println("Need dataset file as argument!"); System.exit(-1); } File resultFile = new File(System.getProperty("java.io.tmpdir"), "similarity.csv"); DataModel dataModel = new MovieLensDataModel(new File(args[0])); ItemSimilarity similarity = new LogLikelihoodSimilarity(dataModel); ItemBasedRecommender recommender = new GenericItemBasedRecommender(dataModel, similarity); BatchItemSimilarities batchItemSimilarities = new MultithreadedBatchItemSimilarities(recommender, 5); SimilarItemsWriter writer = new FileSimilarItemsWriter(resultFile); int numSimilarities = batchItemSimilarities.computeItemSimilarities(Runtime.getRuntime().availableProcessors(), 1,writer); System.out.println("Computed " + numSimilarities + " for " + " items and saved them to " + resultFile.getAbsolutePath()); }}
package xyz.pl8.userrecommendermovielens;import org.apache.commons.io.Charsets;import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;import org.apache.mahout.common.iterator.FileLineIterable;import org.omg.CORBA.PUBLIC_MEMBER;import java.io.*;import java.nio.charset.Charset;import java.util.regex.Pattern;public class MovieLensDataModel extends FileDataModel { private static String COLON_DELIMITER = "::"; private static Pattern COLON_DELIMITTER_PATTERN = Pattern.compile(COLON_DELIMITER); public MovieLensDataModel(File ratingsFile) throws IOException{ super(convertFile(ratingsFile)); } public static File convertFile(File originalFile) throws IOException{ File resultFile = new File(System.getProperty("java.io.tmpdir"), "ratings.csv"); if (resultFile.exists()){ resultFile.delete(); } try { Writer writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8); for (String line : new FileLineIterable(originalFile, false)) { int lastIndex = line.lastIndexOf(COLON_DELIMITER); if (lastIndex < 0) { throw new IOException("Invalid data!"); } String subLine = line.substring(0, lastIndex); String convertedSubLne = COLON_DELIMITTER_PATTERN.matcher(subLine).replaceAll(","); lastIndex = convertedSubLne.lastIndexOf(","); if (lastIndex <= 0) { continue; } writer.write(convertedSubLne); writer.write('\n'); } writer.close(); }catch (Exception e){ resultFile.delete(); } return resultFile; }}