package de.unibi.cebitec.emgb.datawarehouse.export.parser;

import com.datastax.driver.core.QueryLogger;
import com.sleepycat.persist.impl.Store;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.facades.CBlastNrFacade;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.facades.CKeggDataFacade;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.facades.CPfamGoFacade;
import de.unibi.cebitec.emgb.datawarehouse.export.Binning;
import de.unibi.cebitec.emgb.datawarehouse.export.Count;
import de.unibi.cebitec.emgb.datawarehouse.export.Coverage;
import de.unibi.cebitec.emgb.datawarehouse.export.DataObject;
import de.unibi.cebitec.emgb.datawarehouse.export.FoldChange;
import de.unibi.cebitec.emgb.datawarehouse.export.config.FileProperties;
import de.unibi.cebitec.emgb.datawarehouse.util.ThreadPool;
import de.unibi.cebitec.emgb.datawarehouse.util.Time;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/unibi/cebitec/emgb/datawarehouse/export/parser/CMGParser.class */
public class CMGParser {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) CMGParser.class);
    private final FileProperties props;
    private final String targetDir;
    private final int files;
    private final HashMap<String, DataObject> geneIdHashMap = new HashMap<>();
    private final HashMap<String, DataObject> idtoDataObjectHashMap = new HashMap<>();
    private final HashMap<String, List<Coverage>> coverageIdObjectHashMap = new HashMap<>();
    private final HashMap<String, String> keggIdToKoMap = new HashMap<>();
    private final HashMap<String, ArrayList<Binning>> contigToBin = new HashMap<>();
    private final CBlastNrFacade blastEntityFacade = new CBlastNrFacade();
    private final CPfamGoFacade pfamEntityFacade = new CPfamGoFacade();
    private final CKeggDataFacade keggEntityFacade = new CKeggDataFacade();

    public CMGParser(FileProperties fileProperties, String str, int i) {
        this.props = fileProperties;
        this.targetDir = str;
        this.files = i;
    }

    public void run() throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        if (!this.props.getBinningPath().isEmpty()) {
            parseBinning(this.props.getBinningPath());
        }
        parseNucl(this.props.getFnaPath());
        Iterator<Map.Entry<String, String>> it = this.props.getCountFiles().entrySet().iterator();
        while (it.hasNext()) {
            parseCountData(it.next());
        }
        Iterator<Map.Entry<String, String>> it2 = this.props.getCoverageFiles().entrySet().iterator();
        while (it2.hasNext()) {
            parseCoverageData(it2.next());
        }
        Iterator<Map.Entry<String, String>> it3 = this.props.getDnafoldChangeFiles().entrySet().iterator();
        while (it3.hasNext()) {
            parseFoldChange(it3.next());
        }
        parseFaa(this.props.getFaaPath());
        parsePhylodist(this.props.getLcaPath());
        if (!this.props.getKeggPath().isEmpty()) {
            parseKeggData(this.props.getKeggPath(), this.keggEntityFacade);
        }
        parseBlastp(this.props.getBlastPPath(), this.blastEntityFacade);
        parseGff(this.props.getGffPath());
        parsePfamResult(this.props.getPfamPath(), this.pfamEntityFacade);
        LOG.info("All parse steps are done in {}.", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
        LOG.info("{} entries have been found", Integer.valueOf(this.geneIdHashMap.size()));
        if (this.files == 1) {
            LOG.info("Create output file ...");
            ThreadPool.getInstance().execute(new WriterWorker(new File(this.targetDir, this.props.getDataSet() + ".jsonlist.gz"), this.geneIdHashMap.values()));
        } else {
            LOG.info("Create {} ouput files ...", Integer.valueOf(this.files));
            long j = 0;
            HashSet hashSet = new HashSet();
            long size = (this.geneIdHashMap.size() / this.files) + (this.geneIdHashMap.size() % this.files == 0 ? 0 : 1);
            LOG.info("Each file contains roughly {} entries.", Long.valueOf(size));
            int i = 1;
            Iterator<DataObject> it4 = this.geneIdHashMap.values().iterator();
            while (it4.hasNext()) {
                j++;
                hashSet.add(it4.next());
                if (j % size == 0) {
                    ThreadPool.getInstance().execute(new WriterWorker(new File(this.targetDir, this.props.getDataSet() + ".part_" + i + ".jsonlist.gz"), hashSet));
                    i++;
                    hashSet = new HashSet();
                }
            }
        }
        ThreadPool.getInstance().waitFor();
        LOG.info("All done in {}.", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
    }

    public void parseCountData(Map.Entry<String, String> entry) throws Exception {
        LOG.info("[CountData] Parsing {}", entry.getValue());
        BufferedReader bufferedReader = new BufferedReader(new FileReader(entry.getValue()));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            } else if (!readLine.startsWith(Store.NAME_SEPARATOR)) {
                String[] split = readLine.split("\t");
                String str = split[0];
                DataObject dataObject = this.geneIdHashMap.get(str);
                if (dataObject != null) {
                    dataObject.counts.put(entry.getKey(), new Count(entry.getKey(), Integer.parseInt(split[1])));
                } else {
                    LOG.error("Gene ID could not be found during Count data parsing step : {}", str);
                }
            }
        }
    }

    public void parseCoverageData(Map.Entry<String, String> entry) throws Exception {
        LOG.info("[CoverageData] Parsing {}", entry.getValue());
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(entry.getValue()));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                LOG.info("CoverageData parsing done in {} .", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
                return;
            } else if (!readLine.startsWith(Store.NAME_SEPARATOR)) {
                String[] split = readLine.split("\t");
                String str = split[0];
                if (this.coverageIdObjectHashMap.containsKey(str)) {
                    this.coverageIdObjectHashMap.get(str).add(new Coverage(entry.getKey(), Integer.parseInt(split[1]), Integer.parseInt(split[2]), Double.parseDouble(split[3])));
                } else {
                    this.coverageIdObjectHashMap.put(str, new ArrayList());
                    this.coverageIdObjectHashMap.get(str).add(new Coverage(entry.getKey(), Integer.parseInt(split[1]), Integer.parseInt(split[2]), Double.parseDouble(split[3])));
                }
            }
        }
    }

    public void parsePfamResult(String str, CPfamGoFacade cPfamGoFacade) throws Exception {
        LOG.info("[PfamResult] Parsing {}", str);
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        ThreadPool threadPool = ThreadPool.getInstance();
        long j = 0;
        ArrayList arrayList = new ArrayList();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (!readLine.startsWith(Store.NAME_SEPARATOR)) {
                j++;
                arrayList.add(readLine);
                if (arrayList.size() == 5000) {
                    threadPool.execute(new PfamResultWorker(arrayList, cPfamGoFacade, this.geneIdHashMap));
                    arrayList = new ArrayList();
                }
                if (j % QueryLogger.DEFAULT_SLOW_QUERY_THRESHOLD_MS == 0) {
                    System.out.print(".");
                    if (j % 200000 == 0) {
                        System.out.println(" # " + j);
                    }
                }
            }
        }
        if (!arrayList.isEmpty()) {
            threadPool.execute(new PfamResultWorker(arrayList, cPfamGoFacade, this.geneIdHashMap));
        }
        bufferedReader.close();
        threadPool.waitFor();
        System.out.println(" # " + j);
        LOG.info("PfamResult parsing done ({} lines) in {}. ", Long.valueOf(j), Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
    }

    public void parseNucl(String str) throws Exception {
        LOG.info("[Nucl] Parsing {}", str);
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        String str2 = "";
        String str3 = "";
        String str4 = "";
        int i = 0;
        long j = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                System.out.println("");
                DataObject dataObject = new DataObject();
                dataObject.dataSet = this.props.getDataSet();
                dataObject.nucleotideSequence = str4;
                dataObject.geneId = str2;
                this.idtoDataObjectHashMap.put(str3, dataObject);
                this.geneIdHashMap.put(str2, dataObject);
                LOG.info((i + 1) + " nucleotide sequences found.");
                bufferedReader.close();
                LOG.info("FNA parsing done in {}. ", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
                return;
            }
            if (!readLine.contains(">")) {
                str4 = str4 + readLine;
            } else if (str4.isEmpty()) {
                str2 = readLine.split(" ")[0].substring(1);
                int indexOf = readLine.indexOf("ID=");
                str3 = readLine.substring(indexOf + 3, readLine.indexOf(";", indexOf + 1));
            } else {
                DataObject dataObject2 = new DataObject();
                dataObject2.dataSet = this.props.getDataSet();
                dataObject2.nucleotideSequence = str4;
                dataObject2.geneId = str2;
                this.idtoDataObjectHashMap.put(str3, dataObject2);
                this.geneIdHashMap.put(str2, dataObject2);
                i++;
                str2 = readLine.split(" ")[0].substring(1);
                int indexOf2 = readLine.indexOf("ID=");
                str3 = readLine.substring(indexOf2 + 3, readLine.indexOf(";", indexOf2 + 1));
                str4 = "";
                j++;
                if (j % QueryLogger.DEFAULT_SLOW_QUERY_THRESHOLD_MS == 0) {
                    System.out.print(".");
                    if (j % 200000 == 0) {
                        System.out.println(" # " + j);
                    }
                }
            }
        }
    }

    public void parseFaa(String str) throws Exception {
        LOG.info("[Faa] Parsing {}", str);
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        String str2 = "";
        String str3 = "";
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            if (!readLine.contains(">")) {
                str3 = str3 + readLine;
            } else if (str3.isEmpty()) {
                str2 = readLine.split(" ")[0].substring(1);
            } else {
                if (this.geneIdHashMap.containsKey(str2)) {
                    this.geneIdHashMap.get(str2).proteinSequence = str3;
                } else {
                    LOG.warn("{} does not exist during FAA step.", str2);
                }
                str2 = readLine.split(" ")[0].substring(1);
                str3 = "";
            }
        }
        if (this.geneIdHashMap.containsKey(str2)) {
            this.geneIdHashMap.get(str2).proteinSequence = str3;
        } else {
            LOG.warn("{} does not exist during FAA step.", str2);
        }
        bufferedReader.close();
        LOG.info("FAA parsing done in {}. ", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
    }

    public void parseFoldChange(Map.Entry<String, String> entry) throws Exception {
        String value = entry.getValue();
        LOG.info("[FoldChange] Parsing " + value);
        long currentTimeMillis = System.currentTimeMillis();
        String key = entry.getKey();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(value));
        bufferedReader.readLine();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                LOG.info("FoldChange Parsing done in {}. ", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
                return;
            }
            String[] split = readLine.split(",");
            this.geneIdHashMap.get(split[1].replaceAll("\"", "")).foldchanges.put(key, new FoldChange(key, Float.parseFloat(split[8]), Float.parseFloat(split[2]), Float.parseFloat(split[3]), Float.parseFloat(split[9]), Float.parseFloat(split[7]), Float.parseFloat(split[13])));
        }
    }

    public void parseGff(String str) throws Exception {
        LOG.info("[GFF] Parsing " + str);
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                LOG.info("GFF Parsing done in {}. ", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
                return;
            }
            if (!readLine.contains("exon") && !readLine.contains("repeat_region") && !readLine.startsWith(Store.NAME_SEPARATOR)) {
                String[] split = readLine.split("\t");
                int indexOf = split[8].indexOf("ID=");
                String substring = split[8].substring(indexOf + 3, split[8].indexOf(";", indexOf + 1));
                DataObject dataObject = this.idtoDataObjectHashMap.get(substring);
                if (dataObject == null) {
                    LOG.warn("{} not found", substring);
                } else {
                    dataObject.contigId = split[0];
                    if (this.contigToBin.containsKey(dataObject.contigId)) {
                        dataObject.binnings = this.contigToBin.get(dataObject.contigId);
                    }
                    if (this.coverageIdObjectHashMap.containsKey(dataObject.contigId)) {
                        for (Coverage coverage : this.coverageIdObjectHashMap.get(dataObject.contigId)) {
                            dataObject.coverage.put(coverage.id, coverage);
                        }
                    }
                    dataObject.genePredictionTools = split[1];
                    dataObject.type = split[2];
                    dataObject.start = Integer.parseInt(split[3]);
                    dataObject.stop = Integer.parseInt(split[4]);
                    try {
                        dataObject.score = Float.parseFloat(split[5]);
                    } catch (NumberFormatException e) {
                        dataObject.score = 0.0f;
                    }
                    dataObject.strand = split[6];
                    dataObject.frame = split[7];
                }
            }
        }
    }

    public void parsePhylodist(String str) throws Exception {
        LOG.info("[LCA] Parsing " + str);
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                LOG.info("LCA parsing done in {}. ", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
                return;
            }
            String[] split = readLine.split("\t");
            DataObject dataObject = this.geneIdHashMap.get(split[0]);
            if (dataObject != null) {
                dataObject.bestLevel = split[2];
                dataObject.bestClassification = split[3];
                dataObject.Kingdom = split[5].trim();
                dataObject.Phylum = split[6].trim();
                dataObject.Class = split[7].trim();
                dataObject.Order = split[8].trim();
                dataObject.Family = split[9].trim();
                dataObject.Genus = split[10].trim();
                dataObject.Species = split[11].trim();
                dataObject.Strain = split[12].trim();
            } else {
                LOG.warn("Gene ID could not be found during LCA parsing step : {}", split[0]);
            }
        }
    }

    public void parseKeggData(String str, CKeggDataFacade cKeggDataFacade) throws Exception {
        LOG.info("[Kegg]Parsing " + str);
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        ArrayList arrayList = new ArrayList();
        ThreadPool threadPool = ThreadPool.getInstance();
        long j = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            j++;
            arrayList.add(readLine);
            if (arrayList.size() == 5000) {
                threadPool.execute(new KeggDataWorker(arrayList, cKeggDataFacade, this.geneIdHashMap));
                arrayList = new ArrayList();
            }
            if (j % QueryLogger.DEFAULT_SLOW_QUERY_THRESHOLD_MS == 0) {
                System.out.print(".");
                if (j % 200000 == 0) {
                    System.out.println(" # " + j);
                }
            }
        }
        if (!arrayList.isEmpty()) {
            threadPool.execute(new KeggDataWorker(arrayList, cKeggDataFacade, this.geneIdHashMap));
        }
        bufferedReader.close();
        threadPool.waitFor();
        System.out.println(" # " + j);
        LOG.info("KEGG parsing done ({} lines) in {}. ", Long.valueOf(j), Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
    }

    public void parseBlastp(String str, CBlastNrFacade cBlastNrFacade) throws Exception {
        LOG.info("[Blastp] Parsing {} ...", str);
        long currentTimeMillis = System.currentTimeMillis();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(str))));
        ThreadPool threadPool = ThreadPool.getInstance();
        long j = 0;
        ArrayList arrayList = new ArrayList();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            j++;
            arrayList.add(readLine);
            if (arrayList.size() == 5000) {
                threadPool.execute(new BlastPWorker(arrayList, cBlastNrFacade, this.geneIdHashMap));
                arrayList = new ArrayList();
            }
            if (j % QueryLogger.DEFAULT_SLOW_QUERY_THRESHOLD_MS == 0) {
                System.out.print(".");
                if (j % 200000 == 0) {
                    System.out.println(" # " + j);
                }
            }
        }
        if (!arrayList.isEmpty()) {
            threadPool.execute(new BlastPWorker(arrayList, cBlastNrFacade, this.geneIdHashMap));
        }
        bufferedReader.close();
        threadPool.waitFor();
        System.out.println(" # " + j);
        LOG.info("Blastp parsing done ({} lines) in {}s. ", Long.valueOf(j), Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
    }

    private void parseBinning(String str) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split("\t");
            ArrayList<Binning> arrayList = new ArrayList<>();
            for (int i = 1; i < split.length; i++) {
                Binning binning = new Binning();
                binning.label = "metabat";
                for (String str2 : split[i].split(",")) {
                    binning.bins.add(str2);
                }
                arrayList.add(binning);
            }
            this.contigToBin.put(split[0], arrayList);
        }
    }
}
