/*****************************************************
 Amino Acid Preference Toolkit in Java
 Pathogen Project
 Department of Computer Science and Engineering
 University of South Carolina
 Columbia, SC 29208
 Contact Email: rose@cse.sc.edu
*****************************************************/

import java.io.*;
import java.util.*;

public class NewSelectRandomTriples
{
    public static Hashtable codonToAminoAcidMap;
    public static Hashtable aminoToIndexMap;

    public static void main(String[] args)
    {
	//args[0] = fasta filename
	//args[1] = gene filename
	//args[2] = type filename
	//args[3] = number of files input
	//args[4] = number of random items to produce
	//args[5] = size of random items to produce
	//args[6] = output directory name

	try
	    {
		int sizeOfRandomSegment = Integer.parseInt(args[5]);
		NewSelectRandomTriples nsrt = new NewSelectRandomTriples(args[0], args[1], args[2], Integer.parseInt(args[3]), Integer.parseInt(args[4]), sizeOfRandomSegment, args[6]);
	    }
	catch (Exception exception)
	    {
		exception.printStackTrace();
	    }
    }

    public NewSelectRandomTriples(String fastaFileName, String geneFileName, String typeFileName,int numberOfFilesInput, int numberOfRandomSegments,int sizeOfRandomSegment, String outputDirectory) throws Exception
    {
	// counter variables
	int i,j;

	// utility tables

	codonToAminoAcidMap = new Hashtable();
	codonToAminoAcidMap.put("CGA", "R");
	codonToAminoAcidMap.put("CGC", "R");
	codonToAminoAcidMap.put("CGG", "R");
	codonToAminoAcidMap.put("CGT", "R");
	codonToAminoAcidMap.put("AGA", "R");
	codonToAminoAcidMap.put("AGG", "R");
	codonToAminoAcidMap.put("CTA", "L");
	codonToAminoAcidMap.put("CTC", "L");
	codonToAminoAcidMap.put("CTG", "L");
	codonToAminoAcidMap.put("CTT", "L");
	codonToAminoAcidMap.put("TTA", "L");
	codonToAminoAcidMap.put("TTG", "L");
	codonToAminoAcidMap.put("TCA", "S");
	codonToAminoAcidMap.put("TCC", "S");
	codonToAminoAcidMap.put("TCG", "S");
	codonToAminoAcidMap.put("TCT", "S");
	codonToAminoAcidMap.put("AGC", "S");
	codonToAminoAcidMap.put("AGT", "S");
	codonToAminoAcidMap.put("ACA", "T");
	codonToAminoAcidMap.put("ACC", "T");
	codonToAminoAcidMap.put("ACG", "T");
	codonToAminoAcidMap.put("ACT", "T");
	codonToAminoAcidMap.put("CCA", "P");
	codonToAminoAcidMap.put("CCC", "P");
	codonToAminoAcidMap.put("CCG", "P");
	codonToAminoAcidMap.put("CCT", "P");
	codonToAminoAcidMap.put("GCA", "A");
	codonToAminoAcidMap.put("GCC", "A");
	codonToAminoAcidMap.put("GCG", "A");
	codonToAminoAcidMap.put("GCT", "A");
	codonToAminoAcidMap.put("GGA", "G");
	codonToAminoAcidMap.put("GGC", "G");
	codonToAminoAcidMap.put("GGG", "G");
	codonToAminoAcidMap.put("GGT", "G");
	codonToAminoAcidMap.put("GTA", "V");
	codonToAminoAcidMap.put("GTC", "V");
	codonToAminoAcidMap.put("GTG", "V");
	codonToAminoAcidMap.put("GTT", "V");
	codonToAminoAcidMap.put("AAA", "K");
	codonToAminoAcidMap.put("AAG", "K");
	codonToAminoAcidMap.put("AAC", "N");
	codonToAminoAcidMap.put("AAT", "N");
	codonToAminoAcidMap.put("CAA", "Q");
	codonToAminoAcidMap.put("CAG", "Q");
	codonToAminoAcidMap.put("CAC", "H");
	codonToAminoAcidMap.put("CAT", "H");
	codonToAminoAcidMap.put("GAA", "E");
	codonToAminoAcidMap.put("GAG", "E");
	codonToAminoAcidMap.put("GAC", "D");
	codonToAminoAcidMap.put("GAT", "D");
	codonToAminoAcidMap.put("TAC", "Y"); 
	codonToAminoAcidMap.put("TAT", "Y"); 
	codonToAminoAcidMap.put("TGC", "C"); 
	codonToAminoAcidMap.put("TGT", "C"); 
	codonToAminoAcidMap.put("TTC", "F");
	codonToAminoAcidMap.put("TTT", "F");
	codonToAminoAcidMap.put("ATA", "I");
	codonToAminoAcidMap.put("ATC", "I");
	codonToAminoAcidMap.put("ATT", "I"); 
	codonToAminoAcidMap.put("ATG", "M"); 
	codonToAminoAcidMap.put("TGG", "W"); 
	codonToAminoAcidMap.put("TAA", "Z"); 
	codonToAminoAcidMap.put("TAG", "Z");
	codonToAminoAcidMap.put("TGA", "Z");

	aminoToIndexMap = new Hashtable();
	aminoToIndexMap.put("A", new Integer(0));
	aminoToIndexMap.put("C", new Integer(1));
	aminoToIndexMap.put("D", new Integer(2));
	aminoToIndexMap.put("E", new Integer(3));
	aminoToIndexMap.put("F", new Integer(4));
	aminoToIndexMap.put("G", new Integer(5));
	aminoToIndexMap.put("H", new Integer(6));
	aminoToIndexMap.put("I", new Integer(7));
	aminoToIndexMap.put("K", new Integer(8));
	aminoToIndexMap.put("L", new Integer(9));
	aminoToIndexMap.put("M", new Integer(10));
	aminoToIndexMap.put("N", new Integer(11));
	aminoToIndexMap.put("P", new Integer(12));
	aminoToIndexMap.put("Q", new Integer(13));
	aminoToIndexMap.put("R", new Integer(14));
	aminoToIndexMap.put("S", new Integer(15));
	aminoToIndexMap.put("T", new Integer(16));
	aminoToIndexMap.put("V", new Integer(17));
	aminoToIndexMap.put("W", new Integer(18));
	aminoToIndexMap.put("Y", new Integer(19));
	aminoToIndexMap.put("Z", new Integer(20));

	SequenceReader sequenceReader = new SequenceReader();
	SequenceUtilities sequenceUtilities = new SequenceUtilities();

	System.out.println("Looking for " + sizeOfRandomSegment + " aminoTriples");

	// read in the list of possible fasta files
	String[] sequenceFilenames = new String[numberOfFilesInput];
	String[] geneFilenames = new String[numberOfFilesInput];
	String[] types = new String[numberOfFilesInput];

	String line;
	StringTokenizer tokenizer;

	BufferedReader bufferedReader = new BufferedReader(new FileReader(fastaFileName));
	for (i = 0; i < numberOfFilesInput; i++)
	    {
		sequenceFilenames[i] = bufferedReader.readLine();
	    }	
	bufferedReader.close();

	// read in the list of corresponding gene files
	bufferedReader = new BufferedReader(new FileReader(geneFileName));
	for (i = 0; i < numberOfFilesInput; i++)
	    {
		geneFilenames[i] = bufferedReader.readLine();
	    }	
	bufferedReader.close();

	
	// read in the list of corresponding types for each file
	bufferedReader = new BufferedReader(new FileReader(typeFileName));
	for (i = 0; i < numberOfFilesInput; i++)
	    {
		line = bufferedReader.readLine();
		tokenizer = new StringTokenizer(line, ":");
		tokenizer.nextToken();
		types[i] = tokenizer.nextToken();
	    }
	bufferedReader.close();


	Random random = new Random(); 
	    
	PrintWriter typeWriter = new PrintWriter(new FileWriter(outputDirectory + "/randomClassificationTest.types"));
	
	PrintWriter sequenceNamesWriter = new PrintWriter(new FileWriter(outputDirectory + "/classificationTestSequenceNames"));
	PrintWriter testToTrueWriter = new PrintWriter(new FileWriter(outputDirectory + "/randomClassificationTest.testToTrue"));
	PrintWriter testingDataFileWriter = new PrintWriter(new FileWriter(outputDirectory + "/randomClassificationTestGroup"));

	// initialize triplePreferenceArray
	
	double[] triplePreferenceArray = new double[8000];
	for (i = 0; i < 8000; i++)
	    {
		triplePreferenceArray[i] = 0;
	    }
	double sum = 0;

	AminoAcidTriplePreference aminoAcidTriplePreference = new AminoAcidTriplePreference();
	
	// used for multisegment genomes
	Hashtable msFastaTable = new Hashtable();
	Hashtable msGeneTable = new Hashtable();


	// used to store list of available triples
	Vector aminoTriples = new Vector();

	// select random segments appropriately
	int segmentCount = 0;
	boolean multiSegment = false;

	while (segmentCount < numberOfRandomSegments)
	    {
		// reinit everything
		aminoTriples.clear();
		for (i=0; i < 8000; i++)
		    {
			triplePreferenceArray[i] = 0;
		    }
		sum = 0;

		System.out.println("Working on sequence: " + segmentCount);
		
		// pick a random test file
		int fileToChoose = random.nextInt(numberOfFilesInput);
		String sequenceFilename = sequenceFilenames[fileToChoose];
		String geneFilename = geneFilenames[fileToChoose];
		String emFileTypes = types[fileToChoose];

		// see if multisegmented
		if (sequenceFilename.indexOf("_MS") > 0)
		    {
			
			multiSegment = true;
			msFastaTable.clear();
			msGeneTable.clear();
			
			// read in all of the fasta files and gene files that are needed for multisegmented genomes

			BufferedReader multiSegmentReader = new BufferedReader(new FileReader(sequenceFilename));
			String multiSegmentLine;
			    
			int numberOfSegments = 0;
			while ((multiSegmentLine = multiSegmentReader.readLine()) != null)
			    {
				msFastaTable.put(new Integer(numberOfSegments), multiSegmentLine);
				numberOfSegments = numberOfSegments + 1;
			    }
			multiSegmentReader.close();
			
			numberOfSegments = 0;
			multiSegmentReader = new BufferedReader(new FileReader(geneFilename));
			while ((multiSegmentLine = multiSegmentReader.readLine()) != null)
			    {
				msGeneTable.put(new Integer(numberOfSegments), multiSegmentLine);
				numberOfSegments = numberOfSegments + 1;
			    }

			multiSegmentReader.close();
			
	
			Enumeration segmentEnumeration = msFastaTable.keys();
			while (segmentEnumeration.hasMoreElements())
			    {
				// for each segment avaiable, act as if single segment
				Integer segmentNumber = (Integer)segmentEnumeration.nextElement();
				String fastaFilename = (String)msFastaTable.get(segmentNumber);
				geneFilename = (String)msGeneTable.get(segmentNumber);
				System.out.println("Working on multi-segment segment: " + fastaFilename + "," + geneFilename);
				char[] sequence = sequenceReader.parseFastaFile(fastaFilename).toCharArray();
				char[] complementSequence = sequenceUtilities.complementSequence(sequence);
				int sequenceLength = sequence.length;
				ExtractGenes extractGenesObject = new ExtractGenes(geneFilename);
				int[][] geneIndex = extractGenesObject.getGeneArray();
				int numberOfGenes = extractGenesObject.getNumberOfGenes();

				int geneCount = 0;

				StringBuffer currentCodonSequenceBuffer = new StringBuffer();
		       
				while (geneCount < numberOfGenes)
				    {
					int geneStart = geneIndex[geneCount][0];
					int geneStop = geneIndex[geneCount][1];
					int geneStrand = geneIndex[geneCount][2];
					int geneContinues = geneIndex[geneCount][3];
					//System.out.println("Start: " + geneStart + " Stop: " + geneStop + " Strand: " + geneStrand + " Continues: " + geneContinues);
	       		
					if (geneStrand == 0)
					    {
						if (geneStart > 0)
						    {
							for (i = (geneStart - 1); i < geneStop; i++)
							    {
								currentCodonSequenceBuffer.append(sequence[i]);
							    }
						    }
						else
						    {
							for (i = sequenceLength + geneStart; i < sequenceLength; i++)
							    {
								currentCodonSequenceBuffer.append(sequence[i]);
							    }
							for (i = 0; i < geneStop; i++) 
							    {
								currentCodonSequenceBuffer.append(sequence[i]);
							    }
						    }
					    }
					else // complement strand
					    {
						if (geneStart > 0)
						    { 
							StringBuffer newSequenceBuffer = new StringBuffer();
							for (i = (geneStop -1); i >= (geneStart - 1); i--)
							    {
								newSequenceBuffer.append(complementSequence[i]);	
							    }
							if (currentCodonSequenceBuffer.length() > 0)
							    currentCodonSequenceBuffer.insert(0,newSequenceBuffer.toString());
							else currentCodonSequenceBuffer = newSequenceBuffer;
						    }
						else
						    {
							System.out.println("This case should never occur!");
						    }
					    }

					if (geneContinues == 1) // we are not continuing, so compute aminos
					    {
						char[] codonSequence = currentCodonSequenceBuffer.toString().toCharArray();
						char[] aminoAcidSequence = sequenceUtilities.translateToAminoAcidSequence(codonSequence, 0, codonSequence.length - 1);

						//System.out.println("AminoAcid Sequence: " + new String(aminoAcidSequence));

						int aminoSequenceLength = aminoAcidSequence.length;
						StringBuffer aminoTripleBuffer = new StringBuffer();
						for (int al = 0; al < aminoSequenceLength-2; al++)
						    {
							aminoTripleBuffer.delete(0,3);
							aminoTripleBuffer.append(aminoAcidSequence[al]);
							aminoTripleBuffer.append(aminoAcidSequence[al+1]);
							aminoTripleBuffer.append(aminoAcidSequence[al+2]);
							//System.out.println("Counting: " + aminoTripleBuffer.toString());
							aminoTriples.add(aminoTripleBuffer.toString());
						    }	
						currentCodonSequenceBuffer.delete(0, currentCodonSequenceBuffer.length());
					    } // geneContinues == 1
					geneCount = geneCount + 1;
				    } // geneCount < numberOfGenes
			    } // while segmentEnumeration		    
		    } // multisegment
		else
		    {
			// single segment genome
			System.out.println("Working on single segment genome: " + sequenceFilename);
			char[] sequence = sequenceReader.parseFastaFile(sequenceFilename).toCharArray();
			char[] complementSequence = sequenceUtilities.complementSequence(sequence);
			int sequenceLength = sequence.length;
			ExtractGenes extractGenesObject = new ExtractGenes(geneFilename);
			int[][] geneIndex = extractGenesObject.getGeneArray();
			int numberOfGenes = extractGenesObject.getNumberOfGenes();

			int geneCount = 0;

			StringBuffer currentCodonSequenceBuffer = new StringBuffer();
		       
			while (geneCount < numberOfGenes)
			    {
				
				int geneStart = geneIndex[geneCount][0];
				int geneStop = geneIndex[geneCount][1];
				int geneStrand = geneIndex[geneCount][2];
				int geneContinues = geneIndex[geneCount][3];
				//System.out.println("Start: " + geneStart + " Stop: " + geneStop + " Strand: " + geneStrand + " Continues: " + geneContinues);
	       		
				if (geneStrand == 0)
				    {
					if (geneStart > 0)
					    {
						for (i = (geneStart - 1); i < geneStop; i++)
						    {
							currentCodonSequenceBuffer.append(sequence[i]);
						    }
					    }
					else
					    {
						for (i = sequenceLength + geneStart; i < sequenceLength; i++)
						    {
							currentCodonSequenceBuffer.append(sequence[i]);
						    }
						for (i = 0; i < geneStop; i++) 
						    {
							currentCodonSequenceBuffer.append(sequence[i]);
						    }
					    }
				    }
				else // complement strand
				    {
					if (geneStart > 0)
					    { 
						StringBuffer newSequenceBuffer = new StringBuffer();
						for (i = (geneStop -1); i >= (geneStart - 1); i--)
						    {
							newSequenceBuffer.append(complementSequence[i]);	
						    }
						if (currentCodonSequenceBuffer.length() > 0)
						    currentCodonSequenceBuffer.insert(0,newSequenceBuffer.toString());
						else currentCodonSequenceBuffer = newSequenceBuffer;
					    }
					else
					    {
						System.out.println("This case should never occur!");
					    }
				    }

				if (geneContinues == 1) // we are not continuing, so compute aminos
				    {
					char[] codonSequence = currentCodonSequenceBuffer.toString().toCharArray();
					char[] aminoAcidSequence = sequenceUtilities.translateToAminoAcidSequence(codonSequence, 0, codonSequence.length - 1);

					//if (geneCount == 0) System.out.println("AminoAcid Sequence: " + new String(aminoAcidSequence));

					int aminoSequenceLength = aminoAcidSequence.length;
					StringBuffer aminoTripleBuffer = new StringBuffer();
					for (int al = 0; al < aminoSequenceLength-2; al++)
					    {
						aminoTripleBuffer.delete(0,3);
						aminoTripleBuffer.append(aminoAcidSequence[al]);
						aminoTripleBuffer.append(aminoAcidSequence[al+1]);
						aminoTripleBuffer.append(aminoAcidSequence[al+2]);
						//System.out.println("Counting: " + aminoTripleBuffer.toString());
						aminoTriples.add(aminoTripleBuffer.toString());
					    }		
					currentCodonSequenceBuffer.delete(0, currentCodonSequenceBuffer.length());
				    } // geneContinues == 1

				geneCount = geneCount + 1;
			    } // geneCount < numberOfGenes
		    } // single segment or not

		// select appropriate number of triples for bootstrapped data

		int currentFound = 0;
		int badAttempts = 0;
		
		int numberOfTriples = aminoTriples.size();
		System.out.println("Counted " + numberOfTriples + " triples");
		while ((currentFound < sizeOfRandomSegment) && (badAttempts < 1000))
		    {
			int randomTriple = random.nextInt(numberOfTriples);
			String aminoAcidTriple = (String)aminoTriples.elementAt(randomTriple);
			int tripleIndex = getIndexForTriple(aminoAcidTriple);
			triplePreferenceArray[tripleIndex] = triplePreferenceArray[tripleIndex] + 1;
			sum = sum + 1;	
			currentFound = currentFound + 1;
		    }
		   System.out.println("Found: " + sum + " triples");
		   for (j = 0; j < 8000; j++)
		       {
			   triplePreferenceArray[j] = triplePreferenceArray[j] / sum;
		       }
			
		    
		   int featureCount = 1;
		   testingDataFileWriter.print("0");
		   for (j = 0; j < triplePreferenceArray.length; j++)	
		       {
			   if (j == (triplePreferenceArray.length - 1)) testingDataFileWriter.println(" " + featureCount + ":" + triplePreferenceArray[j]);
			   else testingDataFileWriter.print(" " + featureCount+":"+triplePreferenceArray[j]);
			   featureCount = featureCount + 1;
		       }
			
		   typeWriter.println("randomClassificationTest" + segmentCount+ ".triplePreference:" + emFileTypes);
		   sequenceNamesWriter.println("randomClassificationTest"+segmentCount+".triplePreference");
		   testToTrueWriter.println("randomClassificationTest"+segmentCount+":"+sequenceFilename);
		   testToTrueWriter.flush();
		   
		   System.out.println("Just generated test sequence: " + segmentCount);
		   System.out.println();
		   System.out.println();
		   segmentCount = segmentCount + 1;			
	    } // for all random segments needed
	typeWriter.flush(); typeWriter.close();
	sequenceNamesWriter.flush(); sequenceNamesWriter.close();
	testToTrueWriter.flush(); testToTrueWriter.close();
	testingDataFileWriter.flush(); testingDataFileWriter.close();
    } // constructor


    public static int getIndexForTriple(String aminoAcidTriple)
    {
	String firstAmino = "" + aminoAcidTriple.charAt(0);
	String secondAmino = "" + aminoAcidTriple.charAt(1);
	String thirdAmino = "" + aminoAcidTriple.charAt(2);
	
	int firstIndex = ((Integer)aminoToIndexMap.get(firstAmino)).intValue();
	int secondIndex = ((Integer)aminoToIndexMap.get(secondAmino)).intValue();
	int thirdIndex = ((Integer)aminoToIndexMap.get(thirdAmino)).intValue();

	int tripleIndex = (400 * firstIndex) + (20 * secondIndex) + thirdIndex;
	return tripleIndex;	
    }
}  // class
