/*
 * The MIT License
 *
 * Copyright (c) 2013 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
package net.sf.picard.illumina;

import net.sf.picard.PicardException;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.fastq.*;
import net.sf.picard.illumina.parser.ClusterData;
import net.sf.picard.illumina.parser.ReadData;
import net.sf.picard.illumina.parser.ReadStructure;
import net.sf.picard.io.IoUtil;
import net.sf.picard.util.CollectionUtil;
import net.sf.picard.util.IlluminaUtil;
import net.sf.picard.util.Log;
import net.sf.picard.util.TabbedTextFileWithHeaderParser;
import net.sf.samtools.*;
import net.sf.samtools.util.SortingCollection;
import net.sf.samtools.util.StringUtil;

import java.io.*;
import java.util.*;

public class IlluminaBasecallsToFastq extends CommandLineProgram {
    // The following attributes define the command-line arguments
    @Usage
    public String USAGE =
            getStandardUsagePreamble() + "Generate fastq file(s) from data in an Illumina basecalls output directory.\n" +
            "Separate fastq file(s) are created for each template read, and for each barcode read, in the basecalls.\n" +
            "Template fastqs have extensions like .<number>.fastq, where <number> is the number of the template read,\n" +
            "starting with 1.  Barcode fastqs have extensions like .barcode_<number>.fastq, where <number> is the number\n" +
            "of the barcode read, starting with 1.";

    @Option(doc = "The basecalls directory. ", shortName = "B")
    public File BASECALLS_DIR;

    @Option(doc = "Lane number. ", shortName = StandardOptionDefinitions.LANE_SHORT_NAME)
    public Integer LANE;

    @Option(doc = "The prefix for output fastqs.  Extensions as described above are appended.  Use this option for " +
            "a non-barcoded run, or for a barcoded run in which it is not desired to demultiplex reads into separate " +
            "files by barcode.",
            shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME,
            mutex = {"MULTIPLEX_PARAMS"})
    public File OUTPUT_PREFIX;

    @Option(doc = "The barcode of the run.  Prefixed to read names.")
    public String RUN_BARCODE;

    @Option(doc = ReadStructure.PARAMETER_DOC, shortName = "RS")
    public String READ_STRUCTURE;

    @Option(doc = "Tab-separated file for creating all output fastqs demultiplexed by barcode for a lane with single " +
            "IlluminaBasecallsToFastq invocation.  The columns are OUTPUT_PREFIX, and BARCODE_1, BARCODE_2 ... BARCODE_X " +
            "where X = number of barcodes per cluster (optional).  Row with BARCODE_1 set to 'N' is used to specify " +
            "an output_prefix for no barcode match.",
            mutex = {"OUTPUT_PREFIX"})
    public File MULTIPLEX_PARAMS;

    @Option(doc = "Which adapters to look for in the read.")
    public List<IlluminaUtil.IlluminaAdapterPair> ADAPTERS_TO_CHECK = new ArrayList<IlluminaUtil.IlluminaAdapterPair>(
            Arrays.asList(IlluminaUtil.IlluminaAdapterPair.INDEXED,
                    IlluminaUtil.IlluminaAdapterPair.DUAL_INDEXED,
                    IlluminaUtil.IlluminaAdapterPair.NEXTERA_V2,
                    IlluminaUtil.IlluminaAdapterPair.FLUIDIGM));

    @Option(doc = "The number of threads to run in parallel. If NUM_PROCESSORS = 0, number of cores is automatically set to " +
            "the number of cores available on the machine. If NUM_PROCESSORS < 0, then the number of cores used will" +
            " be the number available on the machine less NUM_PROCESSORS.")
    public Integer NUM_PROCESSORS = 0;

    @Option(doc = "If set, this is the first tile to be processed (used for debugging).  Note that tiles are not processed" +
            " in numerical order.",
            optional = true)
    public Integer FIRST_TILE;

    @Option(doc = "If set, process no more than this many tiles (used for debugging).", optional = true)
    public Integer TILE_LIMIT;

    @Option(doc="Apply EAMSS filtering to identify inappropriately quality scored bases towards the ends of reads" +
            " and convert their quality scores to Q2.")
    public boolean APPLY_EAMSS_FILTER = true;

    @Option(doc = "If true, call System.gc() periodically.  This is useful in cases in which the -Xmx value passed " +
            "is larger than the available memory.")
    public Boolean FORCE_GC = true;

    @Option(doc = "Configure SortingCollections to store this many records before spilling to disk. For an indexed" +
            " run, each SortingCollection gets this value/number of indices.")
    public int MAX_READS_IN_RAM_PER_TILE = 1200000;

    private final Map<String, FastqRecordsWriter> barcodeFastqWriterMap = new HashMap<String, FastqRecordsWriter>();
    private ReadStructure readStructure;
    IlluminaBasecallsConverter<FastqRecordsForCluster> basecallsConverter;
    private static final Log log = Log.getInstance(IlluminaBasecallsToFastq.class);
    private final FastqWriterFactory fastqWriterFactory = new FastqWriterFactory();

    private static final Comparator<FastqRecordsForCluster> queryNameComparator = new Comparator<FastqRecordsForCluster>() {
        @Override
        public int compare(final FastqRecordsForCluster r1, final FastqRecordsForCluster r2) {
            return SAMRecordQueryNameComparator.compareReadNames(r1.templateRecords[0].getReadHeader(),
                    r2.templateRecords[0].getReadHeader());
        }
    };


    @Override
    protected int doWork() {
        initialize();

        basecallsConverter.doTileProcessing();
        return 0;
    }

    /**
     * Prepares loggers, initiates garbage collection thread, parses arguments and initialized variables appropriately/
     */
    private void initialize() {
        readStructure = new ReadStructure(READ_STRUCTURE);
        if (MULTIPLEX_PARAMS != null) {
            IoUtil.assertFileIsReadable(MULTIPLEX_PARAMS);
        }
        final boolean demultiplex;
        if (OUTPUT_PREFIX != null) {
            barcodeFastqWriterMap.put(null, buildWriter(OUTPUT_PREFIX));
            demultiplex = false;
        } else {
            populateWritersFromMultiplexParams();
            demultiplex = true;
        }
        int readsPerCluster = readStructure.templates.length() + readStructure.barcodes.length();
        basecallsConverter = new IlluminaBasecallsConverter<FastqRecordsForCluster>(BASECALLS_DIR, LANE, readStructure,
                barcodeFastqWriterMap, demultiplex, MAX_READS_IN_RAM_PER_TILE/readsPerCluster, TMP_DIR, NUM_PROCESSORS,
                FORCE_GC, FIRST_TILE, TILE_LIMIT, queryNameComparator,
                new FastqRecordsForClusterCodec(readStructure.templates.length(),
                readStructure.barcodes.length()), FastqRecordsForCluster.class,
                this.APPLY_EAMSS_FILTER);

        log.info("READ STRUCTURE IS " + readStructure.toString());

        basecallsConverter.setConverter(new ClusterToFastqRecordsForClusterConverter());

    }

    /**
     * Assert that expectedCols are present
     *
     * @param actualCols   The columns present in the MULTIPLEX_PARAMS file
     * @param expectedCols The columns that are REQUIRED
     */
    private void assertExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
        final Set<String> missingColumns = new HashSet<String>(expectedCols);
        missingColumns.removeAll(actualCols);

        if (missingColumns.size() > 0) {
            throw new PicardException(String.format(
                    "MULTIPLEX_PARAMS file %s is missing the following columns: %s.",
                    MULTIPLEX_PARAMS.getAbsolutePath(), StringUtil.join(", ", missingColumns
            )));
        }
    }

    /**
     * For each line in the MULTIPLEX_PARAMS file create a FastqRecordsWriter and put it in the barcodeFastqWriterMap map,
     * where the key to the map is the concatenation of all barcodes in order for the given line.
     */
    private void populateWritersFromMultiplexParams() {
        final TabbedTextFileWithHeaderParser libraryParamsParser = new TabbedTextFileWithHeaderParser(MULTIPLEX_PARAMS);

        final Set<String> expectedColumnLabels = CollectionUtil.makeSet("OUTPUT_PREFIX");
        final List<String> barcodeColumnLabels = new ArrayList<String>();
        for (int i = 1; i <= readStructure.barcodes.length(); i++) {
            barcodeColumnLabels.add("BARCODE_" + i);
        }

        expectedColumnLabels.addAll(barcodeColumnLabels);
        assertExpectedColumns(libraryParamsParser.columnLabels(), expectedColumnLabels);

        for (final TabbedTextFileWithHeaderParser.Row row : libraryParamsParser) {
            List<String> barcodeValues = null;

            if (barcodeColumnLabels.size() > 0) {
                barcodeValues = new ArrayList<String>();
                for (final String barcodeLabel : barcodeColumnLabels) {
                    barcodeValues.add(row.getField(barcodeLabel));
                }
            }

            final String key = (barcodeValues == null || barcodeValues.contains("N")) ? null : StringUtil.join("", barcodeValues);
            if (barcodeFastqWriterMap.containsKey(key)) {    //This will catch the case of having more than 1 line in a non-barcoded MULTIPLEX_PARAMS file
                throw new PicardException("Row for barcode " + key + " appears more than once in MULTIPLEX_PARAMS file " +
                        MULTIPLEX_PARAMS);
            }


            final FastqRecordsWriter writer = buildWriter(new File(row.getField("OUTPUT_PREFIX")));
            barcodeFastqWriterMap.put(key, writer);
        }
        if (barcodeFastqWriterMap.isEmpty()) {
            throw new PicardException("MULTIPLEX_PARAMS file " + MULTIPLEX_PARAMS + " does have any data rows.");
        }
    }

    /**
     * @return FastqRecordsWriter that contains one or more FastqWriters (amount depends on read structure), all using
     * outputPrefix to determine the filename(s).
     */
    private FastqRecordsWriter buildWriter(final File outputPrefix) {
        final File outputDir = outputPrefix.getAbsoluteFile().getParentFile();
        IoUtil.assertDirectoryIsWritable(outputDir);
        final String prefixString = outputPrefix.getName();
        final FastqWriter[] templateWriters = new FastqWriter[readStructure.templates.length()];
        final FastqWriter[] barcodeWriters = new FastqWriter[readStructure.barcodes.length()];
        for (int i = 0; i < templateWriters.length; ++i) {
            final String filename = String.format("%s.%d.fastq", prefixString, i+1);
            templateWriters[i] = fastqWriterFactory.newWriter(new File(outputDir, filename));
        }
        for (int i = 0; i < barcodeWriters.length; ++i) {
            final String filename = String.format("%s.barcode_%d.fastq", prefixString, i+1);
            barcodeWriters[i] = fastqWriterFactory.newWriter(new File(outputDir, filename));
        }
        return new FastqRecordsWriter(templateWriters, barcodeWriters);
    }

    public static void main(final String[] args) {
        new IlluminaBasecallsToFastq().instanceMainWithExit(args);
    }

    /**
     * Container for various FastqWriters, one for each template read and one for each barcode read.
     */
    private static class FastqRecordsWriter implements IlluminaBasecallsConverter.ConvertedClusterDataWriter<FastqRecordsForCluster> {
        final FastqWriter[] templateWriters;
        final FastqWriter[] barcodeWriters;

        /**
         * @param templateWriters Writers for template reads in order, e,g. 0th element is for template read 1.
         * @param barcodeWriters Writers for barcode reads in order, e,g. 0th element is for barcode read 1.
         */
        private FastqRecordsWriter(final FastqWriter[] templateWriters, final FastqWriter[] barcodeWriters) {
            this.templateWriters = templateWriters;
            this.barcodeWriters = barcodeWriters;
        }

        @Override
        public void write(final FastqRecordsForCluster records) {
            write(templateWriters, records.templateRecords);
            write(barcodeWriters, records.barcodeRecords);
        }

        private void write(final FastqWriter[] writers, final FastqRecord[] records) {
            for (int i = 0; i < writers.length; ++i) {
                writers[i].write(records[i]);
            }
        }

        @Override
        public void close() {
            for (final FastqWriter writer : templateWriters) {
                writer.close();
            }
            for (final FastqWriter writer : barcodeWriters) {
                writer.close();
            }
        }
    }

    /**
     * Contains the results of transforming one cluster into the record(s) to be written to output file(s).
     */
    static class FastqRecordsForCluster {
        // These are accessed directly by converter and writer rather than through getters and setters.
        final FastqRecord[] templateRecords;
        final FastqRecord[] barcodeRecords;

        FastqRecordsForCluster(final int numTemplates, final int numBarcodes) {
            templateRecords = new FastqRecord[numTemplates];
            barcodeRecords = new FastqRecord[numBarcodes];
        }
    }

    /**
     * Passed to IlluminaBaseCallsConverter to do the conversion from input format to output format.
     */
    class ClusterToFastqRecordsForClusterConverter
            implements IlluminaBasecallsConverter.ClusterDataConverter<FastqRecordsForCluster> {

        private final int [] templateIndices;
        private final int [] barcodeIndices;

        ClusterToFastqRecordsForClusterConverter() {
            this.templateIndices = readStructure.templates.getIndices();
            this.barcodeIndices = readStructure.barcodes.getIndices();
        }

        @Override
        public FastqRecordsForCluster convertClusterToOutputRecord(final ClusterData cluster) {
            final FastqRecordsForCluster ret =
                    new FastqRecordsForCluster(readStructure.templates.length(), readStructure.barcodes.length());
            final String readName = IlluminaUtil.makeReadName(RUN_BARCODE, cluster.getLane(), cluster.getTile(),
                    cluster.getX(), cluster.getY());
            final boolean appendReadNumberSuffix = ret.templateRecords.length > 1;
            makeFastqRecords(ret.templateRecords, templateIndices, readName, cluster, appendReadNumberSuffix);
            makeFastqRecords(ret.barcodeRecords, barcodeIndices, readName, cluster, false);
            return ret;
        }

        private void makeFastqRecords(final FastqRecord[] recs, final int[] indices, final String readName,
                                      final ClusterData cluster, final boolean appendReadNumberSuffix) {
            for (int i = 0; i < indices.length; ++i) {
                final ReadData readData = cluster.getRead(indices[i]);
                final String readBases = StringUtil.bytesToString(readData.getBases()).replace('.', 'N');
                final String readNameForEnd;
                if (appendReadNumberSuffix) readNameForEnd = readName + "/" + (i+1);
                else readNameForEnd = readName;
                recs[i] = new FastqRecord(readNameForEnd, readBases, null,
                        SAMUtils.phredToFastq(readData.getQualities()));
            }
        }
    }

    /**
     * Coded passed to IlluminaBasecallsConverter for use in SortingCollections of output records.
     */
    static class FastqRecordsForClusterCodec implements SortingCollection.Codec<FastqRecordsForCluster> {
        private final int numTemplates;
        private final int numBarcodes;
        private BasicFastqWriter writer = null;
        private FastqReader reader = null;

        FastqRecordsForClusterCodec(final int numTemplates, final int numBarcodes) {
            this.numTemplates = numTemplates;
            this.numBarcodes = numBarcodes;
        }

        @Override
        public void setOutputStream(final OutputStream os) {
            writer = new BasicFastqWriter(new PrintStream(os));
        }

        @Override
        public void setInputStream(final InputStream is) {
            reader = new FastqReader(new BufferedReader(new InputStreamReader(is)));
        }

        @Override
        public void encode(final FastqRecordsForCluster val) {
            if (numTemplates != val.templateRecords.length) throw new IllegalStateException();
            if (numBarcodes != val.barcodeRecords.length) throw new IllegalStateException();
            encodeArray(val.templateRecords);
            encodeArray(val.barcodeRecords);
            writer.flush();
        }

        private void encodeArray(final FastqRecord[] recs) {
            for (final FastqRecord rec: recs) {
                writer.write(rec);
            }
        }

        @Override
        public FastqRecordsForCluster decode() {
            if (!reader.hasNext()) return null;
            final FastqRecordsForCluster ret = new FastqRecordsForCluster(numTemplates, numBarcodes);
            decodeArray(ret.templateRecords);
            decodeArray(ret.barcodeRecords);
            return ret;
        }

        private void decodeArray(final FastqRecord[] recs) {
            for (int i = 0; i < recs.length; ++i) {
                recs[i] = reader.next();
            }
        }

        @Override
        public SortingCollection.Codec<FastqRecordsForCluster> clone() {
            return new FastqRecordsForClusterCodec(numTemplates, numBarcodes);
        }
    }
}
