/*
 * Decompiled with CFR 0.152.
 */
package pacBio;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import misc.FileManager;
import misc.SystemCommand;
import pacBio.OneMatch;
import pacBio.PrimerPair;
import pacBio.RemoveAmpicillin;

public class removeAmpicillinAndVectorAndDemultiplex {
    static String LABEL = "Ampicillin-promoter";
    static String LABEL0 = "NHP";
    static String LABEL1 = "CIB";
    static String LABEL2 = "KIR";
    static String MARKER = "NN";
    static String MASKER = "X";
    static int MIN_SIZE = 2000;
    static int FLANKING_SIZE = 55;
    static int TRIM_SIZE = 50;
    static int VECTOR_BUFFER_SIZE = 25;

    public static void main(String[] args) throws Exception {
        File inputFasta = new File(args[0]);
        File ampFasta = new File(args[1]);
        File vectorFasta = new File(args[2]);
        File primerFasta = new File(args[3]);
        File outputDir = new File(args[4]);
        HashMap<String, String> sequenceMap = new HashMap<String, String>();
        HashMap<String, String> nameIdMap = new HashMap<String, String>();
        RemoveAmpicillin.getSequenceFromFasta(inputFasta, sequenceMap, nameIdMap);
        System.out.println("alignByCrossmatch for ampicillin");
        File matchResult = RemoveAmpicillin.alignByCrossmatch(inputFasta, ampFasta);
        System.out.println("parseCMresult ");
        Map<String, List<OneMatch>> hitMap = RemoveAmpicillin.parseCMresult(matchResult, RemoveAmpicillin.AMP_LABEL);
        System.out.println("alignByCrossmatch for ampicillin+primer");
        File ampPlusPrimerFastaFile = removeAmpicillinAndVectorAndDemultiplex.contructAmpPlusPrimerFastaFile(ampFasta, primerFasta);
        System.out.println("trimAmpicillin and clustering");
        HashMap<String, List<String>> clusteredReads = new HashMap<String, List<String>>();
        Map<String, String> trimedAmpSequenceMap = removeAmpicillinAndVectorAndDemultiplex.trimAmpicillin(sequenceMap, hitMap, clusteredReads, ampPlusPrimerFastaFile);
        System.out.println("input sequence size=" + sequenceMap.size());
        System.out.println("Ampicillin cleaned and filtered sequence size=" + trimedAmpSequenceMap.size());
        System.out.println("trimVector");
        Map<String, String> trimedVectorSequenceMap = removeAmpicillinAndVectorAndDemultiplex.trimVector(clusteredReads, trimedAmpSequenceMap, vectorFasta);
        System.out.println("Vector cleaned and filtered sequence size=" + trimedVectorSequenceMap.size());
        System.out.println("writeFastaFile ");
        removeAmpicillinAndVectorAndDemultiplex.writeFastaFile(nameIdMap, trimedVectorSequenceMap, clusteredReads, outputDir);
    }

    private static Map<String, String> trimVector(Map<String, List<String>> clusteredReads, Map<String, String> trimedAmpSequenceMap, File vectorFasta) throws Exception {
        HashMap<String, String> trimedVectorSequenceMap = new HashMap<String, String>();
        for (String fosmid : clusteredReads.keySet()) {
            System.out.println("\ttrimVector for " + fosmid + " readsCount=" + clusteredReads.get(fosmid).size());
            File tmpFasta = File.createTempFile("seq", "fa");
            StringBuilder builder = new StringBuilder();
            for (String seqName : clusteredReads.get(fosmid)) {
                if (!trimedAmpSequenceMap.containsKey(seqName)) continue;
                builder.append(">").append(seqName).append("\n");
                builder.append(trimedAmpSequenceMap.get(seqName)).append("\n");
            }
            FileManager.writeTextFile(tmpFasta, builder.toString(), true);
            File matchResult = RemoveAmpicillin.alignByCrossmatch(tmpFasta, vectorFasta);
            Map<String, List<OneMatch>> hitMap = RemoveAmpicillin.parseCMresult(matchResult, RemoveAmpicillin.VECTOR_LABEL);
            System.out.println("trimVector hitMap.size()=" + hitMap.size());
            Map<String, String> fosmidTrimedVecotrSequenceMap = removeAmpicillinAndVectorAndDemultiplex.trimVectorSequences(clusteredReads.get(fosmid), trimedAmpSequenceMap, hitMap);
            System.out.println("\tdone trimVector for " + fosmid + " readsCount=" + fosmidTrimedVecotrSequenceMap.size());
            removeAmpicillinAndVectorAndDemultiplex.addToMap(fosmidTrimedVecotrSequenceMap, trimedVectorSequenceMap);
        }
        return trimedVectorSequenceMap;
    }

    private static void addToMap(Map<String, String> fosmidTrimedVecotrSequenceMap, Map<String, String> trimedVectorSequenceMap) {
        for (String seqName : fosmidTrimedVecotrSequenceMap.keySet()) {
            trimedVectorSequenceMap.put(seqName, fosmidTrimedVecotrSequenceMap.get(seqName));
        }
    }

    private static Map<String, String> trimVectorSequences(List<String> clusterNames, Map<String, String> trimedAmpSequenceMap, Map<String, List<OneMatch>> hitMap) {
        HashMap<String, String> trimedSequenceMap = new HashMap<String, String>();
        for (String readName : clusterNames) {
            if (!hitMap.containsKey(readName)) {
                trimedSequenceMap.put(readName, trimedAmpSequenceMap.get(readName));
                continue;
            }
            String newSequence = trimedAmpSequenceMap.get(readName);
            List<OneMatch> matches = hitMap.get(readName);
            for (OneMatch oneMatch : matches) {
                int start = oneMatch.getTargetStart();
                int end = oneMatch.getTargetEnd();
                newSequence = removeAmpicillinAndVectorAndDemultiplex.MaskSeq(newSequence, start, end);
            }
            String longestChunk = removeAmpicillinAndVectorAndDemultiplex.getLongestChunk(newSequence);
            if (longestChunk == null) continue;
            trimedSequenceMap.put(readName, longestChunk);
            System.out.println("trimVectorSequences " + readName + "before and afterTrimSize=" + newSequence.length() + " " + longestChunk.length());
        }
        return trimedSequenceMap;
    }

    private static String getLongestChunk(String newSequence) {
        String[] seqPieces = newSequence.split(String.valueOf(MASKER) + "+");
        int max = 0;
        String longestPiece = "";
        String[] stringArray = seqPieces;
        int n = seqPieces.length;
        int n2 = 0;
        while (n2 < n) {
            String seqPiece = stringArray[n2];
            if (seqPiece.length() > max) {
                max = seqPiece.length();
                longestPiece = seqPiece;
            }
            ++n2;
        }
        return longestPiece;
    }

    private static String MaskSeq(String sequence, int start, int end) {
        String newSeq = String.valueOf(sequence.substring(0, start - 1)) + removeAmpicillinAndVectorAndDemultiplex.makeXs(end - start + 1);
        if (end < sequence.length()) {
            newSeq = String.valueOf(newSeq) + newSeq + sequence.substring(end);
        }
        return newSeq;
    }

    private static String makeXs(int count) {
        StringBuilder builder = new StringBuilder();
        int i = 0;
        while (i < count) {
            builder.append(MASKER);
            ++i;
        }
        return builder.toString();
    }

    private static void writeFastaFile(Map<String, String> nameIdMap, Map<String, String> trimedSequenceMap, Map<String, List<String>> clusteredReads, File outputDir) {
        System.out.println("output fosmid files=" + clusteredReads.size());
        for (String fosmid : clusteredReads.keySet()) {
            StringBuilder builder = new StringBuilder();
            int negCount = 0;
            for (String seqName : clusteredReads.get(fosmid)) {
                if (!trimedSequenceMap.containsKey(seqName) || trimedSequenceMap.get(seqName).length() < 2 * TRIM_SIZE) {
                    ++negCount;
                    continue;
                }
                builder.append(">").append(seqName).append(" id=").append(nameIdMap.get(seqName)).append("\n");
                builder.append(trimedSequenceMap.get(seqName).substring(TRIM_SIZE, trimedSequenceMap.get(seqName).length() - TRIM_SIZE)).append("\n");
            }
            System.out.println("\t" + fosmid + " " + (clusteredReads.get(fosmid).size() - negCount));
            FileManager.writeTextFile(new File(outputDir, String.valueOf(fosmid) + ".fasta"), builder.toString(), true);
        }
    }

    private static Map<String, String> trimAmpicillin(Map<String, String> sequenceMap, Map<String, List<OneMatch>> hitMap, Map<String, List<String>> clusteredReads, File ampPlusPrimerFastaFile) throws Exception {
        HashMap<String, String> trimedSequenceMap = new HashMap<String, String>();
        for (String readName : sequenceMap.keySet()) {
            if (!hitMap.containsKey(readName)) continue;
            List<OneMatch> matches = hitMap.get(readName);
            if (matches.size() > 1) {
                System.out.println("can't use " + readName + " matches count=" + matches.size());
                continue;
            }
            OneMatch aMatch = matches.get(0);
            int matchSize = aMatch.getQueryEnd() - aMatch.getQueryStart();
            String seqForCluster = null;
            String clusterGroup = null;
            if (matchSize < 940) {
                String newSequence;
                int seqLen = sequenceMap.get(readName).length();
                if (seqLen < MIN_SIZE) continue;
                if (aMatch.getQueryEnd() >= 940) {
                    if (aMatch.isReverseComp && aMatch.getTargetEnd() >= seqLen - 10) {
                        newSequence = sequenceMap.get(readName).substring(0, aMatch.getTargetStart() - 1);
                        System.out.println("amp at end A" + readName + " afterTrimSize=" + newSequence.length());
                        trimedSequenceMap.put(readName, newSequence);
                        seqForCluster = sequenceMap.get(readName).substring(Math.max(0, aMatch.getTargetStart() - FLANKING_SIZE), aMatch.getTargetEnd());
                        clusterGroup = removeAmpicillinAndVectorAndDemultiplex.matchForClusterGroup(seqForCluster, ampPlusPrimerFastaFile);
                        removeAmpicillinAndVectorAndDemultiplex.addClusteredReads(clusterGroup, readName, clusteredReads);
                        continue;
                    }
                    if (!aMatch.isReverseComp && aMatch.getTargetStart() <= 10) {
                        newSequence = sequenceMap.get(readName).substring(aMatch.getTargetEnd());
                        System.out.println("amp at beginning A" + readName + " afterTrimSize=" + newSequence.length());
                        trimedSequenceMap.put(readName, newSequence);
                        seqForCluster = sequenceMap.get(readName).substring(0, Math.min(aMatch.getTargetEnd() + FLANKING_SIZE, seqLen));
                        clusterGroup = removeAmpicillinAndVectorAndDemultiplex.matchForClusterGroup(seqForCluster, ampPlusPrimerFastaFile);
                        removeAmpicillinAndVectorAndDemultiplex.addClusteredReads(clusterGroup, readName, clusteredReads);
                        continue;
                    }
                } else if (aMatch.getQueryStart() < 10) {
                    if (aMatch.isReverseComp && aMatch.getTargetStart() <= 10) {
                        newSequence = sequenceMap.get(readName).substring(aMatch.getTargetEnd());
                        System.out.println("amp at beginning B" + readName + " afterTrimSize=" + newSequence.length());
                        trimedSequenceMap.put(readName, newSequence);
                        seqForCluster = sequenceMap.get(readName).substring(0, Math.min(aMatch.getTargetEnd() + FLANKING_SIZE, seqLen));
                        clusterGroup = removeAmpicillinAndVectorAndDemultiplex.matchForClusterGroup(seqForCluster, ampPlusPrimerFastaFile);
                        removeAmpicillinAndVectorAndDemultiplex.addClusteredReads(clusterGroup, readName, clusteredReads);
                        continue;
                    }
                    if (!aMatch.isReverseComp && aMatch.getTargetEnd() >= seqLen - 10) {
                        newSequence = sequenceMap.get(readName).substring(0, aMatch.getTargetStart() - 1);
                        System.out.println("amp at end B" + readName + " afterTrimSize=" + newSequence.length());
                        trimedSequenceMap.put(readName, newSequence);
                        seqForCluster = sequenceMap.get(readName).substring(Math.max(0, aMatch.getTargetStart() - FLANKING_SIZE), aMatch.getTargetEnd());
                        clusterGroup = removeAmpicillinAndVectorAndDemultiplex.matchForClusterGroup(seqForCluster, ampPlusPrimerFastaFile);
                        removeAmpicillinAndVectorAndDemultiplex.addClusteredReads(clusterGroup, readName, clusteredReads);
                        continue;
                    }
                }
                System.out.println("can't use " + readName + " matchSize=" + matchSize);
                continue;
            }
            String newSequence = String.valueOf(sequenceMap.get(readName).substring(0, aMatch.getTargetStart() - 1)) + MARKER + sequenceMap.get(readName).substring(aMatch.getTargetEnd());
            trimedSequenceMap.put(readName, newSequence);
            seqForCluster = sequenceMap.get(readName).substring(Math.max(0, aMatch.getTargetStart() - FLANKING_SIZE), Math.min(aMatch.getTargetEnd() + FLANKING_SIZE, sequenceMap.get(readName).length()));
            clusterGroup = removeAmpicillinAndVectorAndDemultiplex.matchForClusterGroup(seqForCluster, ampPlusPrimerFastaFile);
            removeAmpicillinAndVectorAndDemultiplex.addClusteredReads(clusterGroup, readName, clusteredReads);
        }
        return trimedSequenceMap;
    }

    private static void addClusteredReads(String clusterGroup, String readName, Map<String, List<String>> clusteredReads) {
        if (!clusteredReads.containsKey(clusterGroup)) {
            clusteredReads.put(clusterGroup, new ArrayList());
        }
        clusteredReads.get(clusterGroup).add(readName);
    }

    private static String matchForClusterGroup(String seqForCluster, File ampPlusPrimerFastaFile) throws Exception {
        String[] lines;
        File tmpFasta = File.createTempFile("seq", "fa");
        StringBuilder builder = new StringBuilder();
        builder.append(">seq").append("\n");
        builder.append(seqForCluster);
        FileManager.writeTextFile(tmpFasta, builder.toString(), true);
        File tempOutputFile = removeAmpicillinAndVectorAndDemultiplex.alignFlankings(ampPlusPrimerFastaFile, tmpFasta);
        String[] stringArray = lines = FileManager.readTextFile(tempOutputFile).split("\n");
        int n = lines.length;
        int n2 = 0;
        while (n2 < n) {
            String line = stringArray[n2];
            if ((line.indexOf(LABEL0) > 0 || line.indexOf(LABEL1) > 0 || line.indexOf(LABEL2) > 0) && line.trim().split("\\s+").length >= 12 && line.trim().split("\\s+")[4].length() > 2) {
                String[] items = line.trim().split("\\s+");
                boolean isReverseComp = items[8].trim().equals("C");
                String targetName = isReverseComp ? items[9].trim() : items[8].trim();
                System.out.println(String.valueOf(targetName) + " " + line);
                return targetName;
            }
            ++n2;
        }
        return null;
    }

    private static File alignFlankings(File ampPlusPrimerFastaFile, File tmpFasta) throws IOException {
        File tempOutputFile = File.createTempFile("out", "txt");
        String cmd = "/genome/bin/cross_match -minmatch 15 -minscore 50 " + tmpFasta.getAbsolutePath() + " " + ampPlusPrimerFastaFile.getAbsolutePath() + " > " + tempOutputFile.getAbsolutePath();
        System.out.println("cmd= " + cmd);
        String[] commandArray = new String[]{"bash", "-c", cmd};
        SystemCommand command = new SystemCommand(commandArray, null, false);
        command.makeItSo();
        return tempOutputFile;
    }

    private static File contructAmpPlusPrimerFastaFile(File ampFasta, File primerFasta) throws Exception {
        File tmpFasta = File.createTempFile("amp", "primer");
        String[] ampLines = FileManager.readTextFile(ampFasta).split("\n");
        String ampSeq = ampLines[1].trim();
        Map<String, PrimerPair> primerPairMap = removeAmpicillinAndVectorAndDemultiplex.getPrimerSeq(primerFasta);
        StringBuilder builder = new StringBuilder();
        for (String id : primerPairMap.keySet()) {
            builder.append(">").append(id);
            builder.append("\n");
            builder.append(primerPairMap.get(id).getForwardSeq());
            builder.append(ampSeq);
            builder.append(removeAmpicillinAndVectorAndDemultiplex.revCompSeq(primerPairMap.get(id).getReverseSeq()));
            builder.append("\n");
        }
        System.out.println(builder.toString());
        FileManager.writeTextFile(tmpFasta, builder.toString(), true);
        return tmpFasta;
    }

    public static String revCompSeq(String seq) {
        String ret = "";
        String base = "";
        int i = seq.length() - 1;
        while (i >= 0) {
            if ("Aa".indexOf(seq.charAt(i)) > -1) {
                base = "T";
            } else if ("Tt".indexOf(seq.charAt(i)) > -1) {
                base = "A";
            } else if ("Cc".indexOf(seq.charAt(i)) > -1) {
                base = "G";
            } else if ("Gg".indexOf(seq.charAt(i)) > -1) {
                base = "C";
            } else if ("Nn".indexOf(seq.charAt(i)) > -1) {
                base = "N";
            } else if ("Ww".indexOf(seq.charAt(i)) > -1) {
                base = "W";
            } else if (".".indexOf(seq.charAt(i)) > -1) {
                base = ".";
            }
            ret = String.valueOf(ret) + base;
            --i;
        }
        return ret;
    }

    private static Map<String, PrimerPair> getPrimerSeq(File primerFasta) {
        LinkedHashMap<String, PrimerPair> primerPairMap = new LinkedHashMap<String, PrimerPair>();
        String[] lines = FileManager.readTextFile(primerFasta).split("\n");
        int i = 0;
        while (i < lines.length) {
            if (lines[i].startsWith(">")) {
                String seqName = lines[i].trim().substring(1);
                String primerSeq = lines[i + 1].trim();
                String id = null;
                id = seqName.startsWith("KIR") ? seqName.split("-")[0] : seqName.replace("-up", "").replace("-down", "");
                if (!primerPairMap.containsKey(id)) {
                    primerPairMap.put(id, new PrimerPair());
                    ((PrimerPair)primerPairMap.get(id)).setId(id);
                }
                if (seqName.endsWith("up")) {
                    ((PrimerPair)primerPairMap.get(id)).setForwardName(seqName);
                    ((PrimerPair)primerPairMap.get(id)).setForwardSeq(primerSeq);
                }
                if (seqName.endsWith("down")) {
                    ((PrimerPair)primerPairMap.get(id)).setReverseName(seqName);
                    ((PrimerPair)primerPairMap.get(id)).setReverseSeq(primerSeq);
                }
            }
            ++i;
        }
        return primerPairMap;
    }
}

