/*****************************************************
 Amino Acid Preference Toolkit in Java
 Pathogen Project
 Department of Computer Science and Engineering
 University of South Carolina
 Columbia, SC 29208
 Contact Email: rose@cse.sc.edu
*****************************************************/

import java.io.*;
import java.util.*;

public class ValidationFormatForSVM
{
	public static void main(String[] args) throws Exception
	{
		// args[0] = types file
		// args[1] = training data file
		// args[2] = output filename


		Vector types = new Vector();

		BufferedReader bufferedReader = new BufferedReader(new FileReader(args[0]));
		String line;
		StringTokenizer tokenizer;

		while ((line = bufferedReader.readLine()) != null)
		{
			types.add(line);
		}
		bufferedReader.close();

		Hashtable filenameToTypeTable = new Hashtable();
		bufferedReader = new BufferedReader(new FileReader(args[1]));
		String filename, type;
		LinkedList names = new LinkedList();
		while ((line = bufferedReader.readLine()) != null)
		{
			tokenizer = new StringTokenizer(line, ":");

			filename = tokenizer.nextToken();
			names.add(filename);
			type = tokenizer.nextToken();
			filenameToTypeTable.put(filename, type);
		}
		bufferedReader.close();


		// generate a training file for each type


		int numberOfTypes = types.size();

		String trainingLabel = "0";
		int featureCount = 1;

		double value;
		Hashtable printWriterTable = new Hashtable();

		for (int i = 0; i < numberOfTypes; i++)
		{
			String typeName = (String)types.elementAt(i);
			PrintWriter printWriter = new PrintWriter(new FileWriter(args[2] + "." + typeName));
			printWriterTable.put(typeName, printWriter);
		}
		int numberOfFiles = names.size();

		for (int z = 0; z < numberOfFiles; z++)
		{
			featureCount = 1;
			filename = (String)names.get(z);
			String genomeType = (String)filenameToTypeTable.get(filename);
			bufferedReader = new BufferedReader(new FileReader(filename));

			line = bufferedReader.readLine();
			tokenizer = new StringTokenizer(line);


			for (int i = 0; i < numberOfTypes; i++)
			{
				String typeName = (String)types.elementAt(i);
				PrintWriter printWriter = (PrintWriter)printWriterTable.get(typeName);
				if (genomeType.equals(typeName)) trainingLabel = "1";
				else trainingLabel = "-1";

				printWriter.print(trainingLabel);
			}

			while (tokenizer.hasMoreTokens())
			{
				value = new Double(tokenizer.nextToken()).doubleValue();
				if (value > 0)
				{
					for (int i = 0; i < numberOfTypes; i++)
					{
						String typeName = (String)types.elementAt(i);
						PrintWriter printWriter = (PrintWriter)printWriterTable.get(typeName);
						printWriter.print(" " + featureCount + ":" + value);	
					}
				}	
				featureCount++;
			}
			for (int i = 0; i < numberOfTypes; i++)
			{
				String typeName = (String)types.elementAt(i);
				PrintWriter printWriter = (PrintWriter)printWriterTable.get(typeName);
				printWriter.println();
			}
			bufferedReader.close();
		}

		for (int i = 0; i < numberOfTypes; i++)
		{
			String typeName = (String)types.elementAt(i);
			PrintWriter printWriter = (PrintWriter)printWriterTable.get(typeName);
			printWriter.flush();
			printWriter.close();
		}
	}
}
