/*****************************************************
 Amino Acid Preference Toolkit in Java
 Pathogen Project
 Department of Computer Science and Engineering
 University of South Carolina
 Columbia, SC 29208
 Contact Email: rose@cse.sc.edu
*****************************************************/

import java.io.*;
import java.util.*;


public class GenerateSVMSubsetRenormalize
{
    public static void main(String[] args)
    {
	// args[0] = subset file
	// args[1] = svm input file
	// args[2] = svm output file
	// args[3] = number of items in subset file
	// args[4] = number of features in svm file


	if (args.length != 5)
	    {
		System.out.println("Usage: java GenerateSVMSubset subsetFile svmFormattedInputFile svmFormattedOutputFile numberOfEntriesInSubsetFile numberOfSVMFeatures");
		System.exit(-1);
	    }
	else
	    {
		
		try
		    {
			// read in subset data
			int subsetFeatureCount = Integer.parseInt(args[3]);
			int[] featureSubsetArray = new int[subsetFeatureCount];

			String line;
			StringTokenizer tokenizer, featureTokenizer;

			BufferedReader bufferedReader = new BufferedReader(new FileReader(args[0]));
			int featureCount = 0;
			while ((line = bufferedReader.readLine()) != null)
			    {
				featureSubsetArray[featureCount] = Integer.parseInt(line);
				featureCount = featureCount + 1;
			    }
			bufferedReader.close();


			// open output file
			PrintWriter outputWriter = new PrintWriter(new FileWriter(args[2]));

			// read in svm input file
			int svmFeatureCount = Integer.parseInt(args[4]);
			double[] svmFeatureArray = new double[svmFeatureCount];
			int classifierValue = -2;
			
			bufferedReader = new BufferedReader(new FileReader(args[1]));
			while ((line = bufferedReader.readLine()) != null)
			    {
				tokenizer = new StringTokenizer(line);
				classifierValue = Integer.parseInt(tokenizer.nextToken());
				featureCount = 0;
				while (tokenizer.hasMoreTokens())
				    {
					featureTokenizer = new StringTokenizer(tokenizer.nextToken(), ":");
					featureTokenizer.nextToken();
					svmFeatureArray[featureCount] = new Double(featureTokenizer.nextToken()).doubleValue();
					featureCount = featureCount + 1;
				    }


				// sum the percentage of the features we are interested in
				int featureOfInterest;
				double percentageSum = 0;
				for (int i = 0; i < subsetFeatureCount; i++)
				{
					featureOfInterest = featureSubsetArray[i];
					percentageSum = percentageSum + svmFeatureArray[featureOfInterest];
				}

				//System.out.println("Percent of total usage in features of interest: " + percentageSum);
				// print out the renormalized features we are interested in
				outputWriter.print(classifierValue);
				for (int i = 0; i < subsetFeatureCount; i++)
				    {
					featureOfInterest = featureSubsetArray[i];
					if (percentageSum > 0) outputWriter.print(" " + (i+1) + ":" + (svmFeatureArray[featureOfInterest] / percentageSum));
					else outputWriter.print(" " + (i+1) + ":" + svmFeatureArray[featureOfInterest]);
				    }
				outputWriter.println();
				outputWriter.flush();
			    }

			bufferedReader.close();
			outputWriter.flush();
			outputWriter.close();
			
		    }
		catch (Exception exception)
		    {
			System.out.println("Exception in GenerateSVMSubset: " + exception);
			exception.printStackTrace();
		    }

	    }
    }
}

