/*
 * Decompiled with CFR 0.152.
 */
package com.compomics.dbtoolkit;

import com.compomics.dbtoolkit.io.DBLoaderLoader;
import com.compomics.dbtoolkit.io.interfaces.DBLoader;
import com.compomics.util.protein.Protein;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HPP_mapper {
    public static void main(String[] args) {
        File seqDB;
        File peps;
        if (args == null || args.length != 3) {
            HPP_mapper.printUsage();
        }
        if (!(peps = new File(args[0])).exists()) {
            HPP_mapper.printError("The file with peptide sequences you specified ('" + args[0] + "') does not exist!");
        }
        if (!(seqDB = new File(args[1])).exists()) {
            HPP_mapper.printError("The sequence database file you specified ('" + args[1] + "') does not exist!");
        }
        File output = new File(args[2]);
        try {
            TreeSet<String> allSeqs = new TreeSet<String>();
            System.err.println("\n\nReading input file '" + args[0] + "'...");
            BufferedReader br = new BufferedReader(new FileReader(peps));
            String line = null;
            while ((line = br.readLine()) != null) {
                if ((line = line.trim()).contains("|")) {
                    StringTokenizer st = new StringTokenizer(line, "|");
                    while (st.hasMoreTokens()) {
                        String temp = st.nextToken().trim();
                        allSeqs.add(temp);
                    }
                    continue;
                }
                allSeqs.add(line);
            }
            System.err.println("Retrieved " + allSeqs.size() + " unique sequences from the file.");
            System.err.println("Researching all isoforms for the retrieved sequences in database file '" + args[1] + "'...");
            HashMap results = HPP_mapper.processSequences(seqDB, allSeqs);
            System.err.println("Writing output to file...");
            BufferedWriter bw = new BufferedWriter(new FileWriter(output));
            for (String peptide : results.keySet()) {
                Collection valueCollection = (Collection)results.get(peptide);
                int size = valueCollection.size();
                Iterator valueIterator = valueCollection.iterator();
                bw.write(peptide + "\t" + size + "\t");
                boolean first = true;
                while (valueIterator.hasNext()) {
                    String value = (String)valueIterator.next();
                    if (!first) {
                        bw.write(";");
                    } else {
                        first = false;
                    }
                    bw.write(value);
                }
                bw.newLine();
            }
            bw.flush();
            bw.close();
            System.err.println("File Written!\n\nJob's done!\n\n");
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static HashMap processSequences(File aFile, Collection aSequences) throws Exception {
        HashMap matches = new HashMap(aSequences.size());
        TreeSet<String> allGenes = new TreeSet<String>();
        DBLoader loader = DBLoaderLoader.loadDB(aFile);
        Pattern genePattern = Pattern.compile(".* gene:(\\S*) .*");
        Pattern chromosomePattern = Pattern.compile(".* chromosome:[^:]*:([^:]*):.*");
        Protein p = null;
        while ((p = loader.nextProtein()) != null) {
            String sequence = p.getSequence().getSequence();
            String rest = p.getHeader().getRest();
            Matcher geneMatcher = genePattern.matcher(rest);
            String gene = null;
            if (geneMatcher.matches()) {
                gene = geneMatcher.group(1);
            } else {
                System.err.println("  ** no match found for gene in '" + rest + "'!");
            }
            Matcher chromosomeMatcher = chromosomePattern.matcher(rest);
            String chromosome = null;
            if (chromosomeMatcher.matches()) {
                chromosome = chromosomeMatcher.group(1);
            } else {
                System.err.println("  ** no match found for chromosome in '" + rest + "'!");
            }
            String value = new StringBuffer(gene).append(",").append(chromosome).toString();
            for (String s : aSequences) {
                Collection<String> c;
                if (sequence.indexOf(s) < 0) continue;
                if (matches.containsKey(s)) {
                    c = (Collection)matches.get(s);
                    c.add(value);
                } else {
                    c = new TreeSet();
                    c.add(value);
                    matches.put(s, c);
                }
                allGenes.add(gene);
            }
        }
        System.err.println("Matched " + allGenes.size() + " genes in total.");
        for (String sequence : aSequences) {
            if (matches.containsKey(sequence)) continue;
            matches.put(sequence, new TreeSet());
        }
        loader.close();
        return matches;
    }

    private static void printUsage() {
        HPP_mapper.printError("Usage:\n\n\tHPP_mapper <peptide_sequence_file> <sequence_database_file> <output_file>\n\n");
    }

    private static void printError(String aMsg) {
        System.err.println("\n\n" + aMsg + "\n\n");
        System.exit(1);
    }
}

