/*****************************************************
 Amino Acid Preference Toolkit in Java
 Pathogen Project
 Department of Computer Science and Engineering
 University of South Carolina
 Columbia, SC 29208
 Contact Email: rose@cse.sc.edu
*****************************************************/


import java.io.*;
import java.util.*;

public class AminoAcidSinglePreference
{

	Hashtable aminoToIndexMap;
	
	public static void main(String[] args)
	{

		// args[0] = amino acid filename (FASTA format)
		// args[1] = output filename

		String aminoFilename = args[0];
		String outputFilename = args[1];

		SequenceReader sequenceReader = new SequenceReader();
		char[] aminoAcidSequence = sequenceReader.parseFastaFile(aminoFilename).toCharArray();
		AminoAcidSinglePreference singlePreference = new AminoAcidSinglePreference();
		double[] singlePreferenceVector = singlePreference.calculateSinglePreferenceVector(aminoAcidSequence);
		writeToFile(singlePreferenceVector, outputFilename);
		
	}


	public static void writeToFile(double[] singlePreferenceVector, String outputFilename)
	{
		try
		{
			// writes in format needed for classification tools, HMM learning, etc
			PrintWriter printWriter = new PrintWriter(new FileWriter(outputFilename));
			for (int i = 0; i < singlePreferenceVector.length; i++)	
			{
				if (i == (singlePreferenceVector.length - 1)) printWriter.println(singlePreferenceVector[i]);
				else printWriter.print(singlePreferenceVector[i] + " ");
			}
			printWriter.flush();	
			printWriter.close();
		}
		catch (Exception exception)
		{	
			exception.printStackTrace();
		}

	}

	public AminoAcidSinglePreference()
	{
		aminoToIndexMap = new Hashtable();
		aminoToIndexMap.put("A", new Integer(0));
		aminoToIndexMap.put("C", new Integer(1));
		aminoToIndexMap.put("D", new Integer(2));
		aminoToIndexMap.put("E", new Integer(3));
		aminoToIndexMap.put("F", new Integer(4));
		aminoToIndexMap.put("G", new Integer(5));
		aminoToIndexMap.put("H", new Integer(6));
		aminoToIndexMap.put("I", new Integer(7));
		aminoToIndexMap.put("K", new Integer(8));
		aminoToIndexMap.put("L", new Integer(9));
		aminoToIndexMap.put("M", new Integer(10));
		aminoToIndexMap.put("N", new Integer(11));
		aminoToIndexMap.put("P", new Integer(12));
		aminoToIndexMap.put("Q", new Integer(13));
		aminoToIndexMap.put("R", new Integer(14));
		aminoToIndexMap.put("S", new Integer(15));
		aminoToIndexMap.put("T", new Integer(16));
		aminoToIndexMap.put("V", new Integer(17));
		aminoToIndexMap.put("W", new Integer(18));
		aminoToIndexMap.put("Y", new Integer(19));
		aminoToIndexMap.put("Z", new Integer(20));
		
	}


	double[] calculateSinglePreferenceVector(char[] aminoAcidSequence)
	{

		char firstAminoAcid;
		int firstAminoAcidIndex;
		int i;

		double sum = 0;
		double vSum = 0;
		double[] returnVector = new double[20];

		for (i = 0; i < 20; i++)
		{
			returnVector[i] = 0;
		}
		
		// go through the sequence looking at single amino acids
		int aminoAcidSequenceLength = aminoAcidSequence.length;
		for (i = 0; i < aminoAcidSequenceLength; i++)
		{
			firstAminoAcid = aminoAcidSequence[i];
			firstAminoAcidIndex = ((Integer)aminoToIndexMap.get(""+firstAminoAcid)).intValue();

			if (!(firstAminoAcidIndex == 20))
			{
				returnVector[firstAminoAcidIndex] = returnVector[firstAminoAcidIndex] + 1;
				sum = sum + 1;
			}
		}

		// normalize

		for (i = 0; i < 20; i++)
		{
			returnVector[i] = returnVector[i] / sum;
		}
		
		// verify sum = 1

		for (i = 0; i < 20; i++)
		{
			vSum = vSum + returnVector[i];
		}
		System.out.println("Verification Sum (should = 1): " + vSum);

		return returnVector;
	}

	         
	public double[] calculateSinglePreferenceVectorNoNormalization(char[] aminoAcidSequence)
	{

		char firstAminoAcid;
		int firstAminoAcidIndex;
		int i;

		double[] returnVector = new double[20];

		for (i = 0; i < 20; i++)
		{
			returnVector[i] = 0;
		}
		
		// go through the sequence looking at single amino acids
		int aminoAcidSequenceLength = aminoAcidSequence.length;
		for (i = 0; i < aminoAcidSequenceLength; i++)
		{
			firstAminoAcid = aminoAcidSequence[i];
			firstAminoAcidIndex = ((Integer)aminoToIndexMap.get(""+firstAminoAcid)).intValue();

			if (!(firstAminoAcidIndex == 20)) 
			{		
				returnVector[firstAminoAcidIndex] = returnVector[firstAminoAcidIndex] + 1;
			}
		}

		return returnVector;
	}
}
