1. Alexandre Patry
  2. mallet

Commits

David Mimno  committed 48877ef

new doc-topics format

  • Participants
  • Parent commits e39493b
  • Branches default

Comments (0)

Files changed (1)

File src/cc/mallet/topics/ParallelTopicModel.java

View file
  • Ignore whitespace
 	 *  @param max         Print no more than this many topics
 	 */
 	public void printDocumentTopics (PrintWriter out, double threshold, int max)	{
-		out.print ("#doc source topic proportion ...\n");
+		out.print ("#doc name topic proportion ...\n");
 		int docLen;
 		int[] topicCounts = new int[ numTopics ];
 
 			LabelSequence topicSequence = (LabelSequence) data.get(doc).topicSequence;
 			int[] currentDocTopics = topicSequence.getFeatures();
 
-			out.print (doc); out.print (' ');
+			StringBuilder builder = new StringBuilder();
 
-			if (data.get(doc).instance.getSource() != null) {
-				out.print (data.get(doc).instance.getSource()); 
+			builder.append(doc);
+			builder.append("\t");
+
+			if (data.get(doc).instance.getName() != null) {
+				builder.append(data.get(doc).instance.getName()); 
 			}
 			else {
-				out.print ("null-source");
+				builder.append("no-name");
 			}
 
-			out.print (' ');
+			builder.append("\t");
 			docLen = currentDocTopics.length;
 
 			// Count up the tokens
 
 			// And normalize
 			for (int topic = 0; topic < numTopics; topic++) {
-				sortedTopics[topic].set(topic, (float) topicCounts[topic] / docLen);
+				sortedTopics[topic].set(topic, (alpha[topic] + topicCounts[topic]) / (docLen + alphaSum) );
 			}
 			
 			Arrays.sort(sortedTopics);
 			for (int i = 0; i < max; i++) {
 				if (sortedTopics[i].getWeight() < threshold) { break; }
 				
-				out.print (sortedTopics[i].getID() + " " + 
-						  sortedTopics[i].getWeight() + " ");
+				builder.append(sortedTopics[i].getID() + "\t" + 
+							   sortedTopics[i].getWeight() + "\t");
 			}
-			out.print (" \n");
+			out.println(builder);
 
 			Arrays.fill(topicCounts, 0);
 		}