package de.unibi.cebitec.emgb.datawarehouse.export.parser;

import com.sleepycat.persist.impl.Store;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.beans.CGoDataEntity;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.beans.CKeggDataEntity;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.beans.CPathwayEntity;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.beans.CPfamGoEntity;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.facades.CKeggDataFacade;
import de.unibi.cebitec.emgb.datawarehouse.cassandra.facades.CPfamGoFacade;
import de.unibi.cebitec.emgb.datawarehouse.export.Binning;
import de.unibi.cebitec.emgb.datawarehouse.export.Count;
import de.unibi.cebitec.emgb.datawarehouse.export.Coverage;
import de.unibi.cebitec.emgb.datawarehouse.export.DataObject;
import de.unibi.cebitec.emgb.datawarehouse.export.FoldChange;
import de.unibi.cebitec.emgb.datawarehouse.export.GoObject;
import de.unibi.cebitec.emgb.datawarehouse.export.Pathway;
import de.unibi.cebitec.emgb.datawarehouse.export.PfamObject;
import de.unibi.cebitec.emgb.datawarehouse.export.config.FileProperties;
import de.unibi.cebitec.emgb.datawarehouse.util.Time;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPOutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:de/unibi/cebitec/emgb/datawarehouse/export/parser/IMGParser.class */
public class IMGParser {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) IMGParser.class);
    private final FileProperties props;
    private final String targetDir;
    private final HashMap<String, DataObject> geneIdHashMap = new HashMap<>();
    private final HashMap<String, List<Coverage>> coverageIdObjectHashMap = new HashMap<>();
    private final HashMap<String, String> keggIdToKoMap = new HashMap<>();
    private final HashMap<String, Integer> contigIdLengthMap = new HashMap<>();
    private final HashMap<String, ArrayList<Binning>> contigToBin = new HashMap<>();
    private final CPfamGoFacade pfamEntityFacade = new CPfamGoFacade();
    private final CKeggDataFacade keggEntityFacade = new CKeggDataFacade();

    public IMGParser(FileProperties fileProperties, String str) {
        this.props = fileProperties;
        this.targetDir = str;
    }

    public void run() throws Exception {
        long currentTimeMillis = System.currentTimeMillis();
        parseFaa(this.props.getFaaPath());
        if (!this.props.getBinningPath().isEmpty()) {
            parseBinning(this.props.getBinningPath());
        }
        Iterator<Map.Entry<String, String>> it = this.props.getCountFiles().entrySet().iterator();
        while (it.hasNext()) {
            parseCountData(it.next());
        }
        Iterator<Map.Entry<String, String>> it2 = this.props.getCoverageFiles().entrySet().iterator();
        while (it2.hasNext()) {
            parseCoverageData(it2.next());
        }
        parsePhylodist(this.props.getLcaPath());
        if (!this.props.getKoPath().isEmpty()) {
            parseKoData(this.props.getKoPath(), this.keggEntityFacade);
        }
        parseProductNames(this.props.getProductNamesPath());
        parseGff(this.props.getGffPath());
        parsePfamResult(this.props.getPfamPath(), this.pfamEntityFacade);
        parseECs(this.props.getEcPath());
        Iterator<Map.Entry<String, String>> it3 = this.props.getDnafoldChangeFiles().entrySet().iterator();
        while (it3.hasNext()) {
            parseFoldChange(it3.next());
        }
        LOG.info("All parse steps are done in {}.", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
        LOG.info("{}  entries have been found", Integer.valueOf(this.geneIdHashMap.size()));
        File file = new File(this.targetDir, this.props.getDataSet() + ".json.gz");
        System.out.println("Writing to: " + file.getAbsolutePath());
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(file)), "UTF-8"));
        int i = 0;
        Iterator<Map.Entry<String, DataObject>> it4 = this.geneIdHashMap.entrySet().iterator();
        while (it4.hasNext()) {
            bufferedWriter.write(it4.next().getValue().toJson() + "\n");
            i++;
            if (i > 10000) {
                i = 0;
                bufferedWriter.flush();
            }
        }
        bufferedWriter.flush();
        bufferedWriter.close();
        LOG.info("All done in {}.", Time.ms2humantime(System.currentTimeMillis() - currentTimeMillis));
    }

    public void parseFoldChange(Map.Entry<String, String> entry) throws Exception {
        String value = entry.getValue();
        System.out.println("Parsing " + value);
        String key = entry.getKey();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(value));
        bufferedReader.readLine();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split(",");
            this.geneIdHashMap.get(split[1].replaceAll("\"", "")).foldchanges.put(key, new FoldChange(key, Float.parseFloat(split[8]), Float.parseFloat(split[2]), Float.parseFloat(split[3]), Float.parseFloat(split[9]), Float.parseFloat(split[7]), Float.parseFloat(split[13])));
        }
    }

    public void parseCountData(Map.Entry<String, String> entry) throws Exception {
        System.out.println("Parsing " + entry.getValue());
        BufferedReader bufferedReader = new BufferedReader(new FileReader(entry.getValue()));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            } else if (!readLine.startsWith(Store.NAME_SEPARATOR)) {
                String[] split = readLine.split("\t");
                String str = split[0];
                DataObject dataObject = this.geneIdHashMap.get(str);
                if (dataObject != null) {
                    dataObject.counts.put(entry.getKey(), new Count(entry.getKey(), Integer.parseInt(split[1])));
                } else {
                    System.err.println("Gene ID could not be found during Count data parsing step " + str);
                }
            }
        }
    }

    public void parseCoverageData(Map.Entry<String, String> entry) throws Exception {
        System.out.println("Parsing " + entry.getValue());
        BufferedReader bufferedReader = new BufferedReader(new FileReader(entry.getValue()));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            } else if (!readLine.startsWith(Store.NAME_SEPARATOR)) {
                String[] split = readLine.split("\t");
                String str = split[0];
                if (this.coverageIdObjectHashMap.containsKey(str)) {
                    this.coverageIdObjectHashMap.get(str).add(new Coverage(entry.getKey(), Integer.parseInt(split[1]), Integer.parseInt(split[2]), Double.parseDouble(split[3])));
                } else {
                    this.coverageIdObjectHashMap.put(str, new ArrayList());
                    this.coverageIdObjectHashMap.get(str).add(new Coverage(entry.getKey(), Integer.parseInt(split[1]), Integer.parseInt(split[2]), Double.parseDouble(split[3])));
                }
            }
        }
    }

    public void parsePfamResult(String str, CPfamGoFacade cPfamGoFacade) throws Exception {
        System.out.println("Parsing " + str);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return;
            }
            if (!readLine.startsWith(Store.NAME_SEPARATOR)) {
                String[] split = readLine.split("\\s+");
                String str2 = split[0];
                DataObject dataObject = this.geneIdHashMap.get(str2);
                if (dataObject != null) {
                    PfamObject pfamObject = new PfamObject();
                    pfamObject.seq_id = str2;
                    pfamObject.hmm_acc = split[4];
                    pfamObject.hmm_name = split[3];
                    CPfamGoEntity cPfamGoEntity = cPfamGoFacade.get(split[4].contains(".") ? "Pfam:" + split[4].substring(0, split[4].indexOf(".")) : "Pfam:" + split[4]);
                    if (cPfamGoEntity != null) {
                        for (Map.Entry<String, CGoDataEntity> entry : cPfamGoEntity.getGo2goData().entrySet()) {
                            GoObject goObject = new GoObject();
                            goObject.goId = entry.getKey();
                            goObject.goDescription = entry.getValue().getDescription();
                            goObject.goOrigin = "PFAM";
                            if (entry.getValue().getGosOnLineage() != null) {
                                goObject.goLineage.addAll(entry.getValue().getGosOnLineage());
                            }
                            dataObject.goMaps.put(goObject.goId, goObject);
                        }
                    }
                    dataObject.pfams.put(split[4].trim(), pfamObject);
                } else {
                    System.err.println("Gene ID could not be found during Pfam parsing step " + str2);
                }
            }
        }
    }

    public void parseFaa(String str) throws Exception {
        System.out.println("Parsing " + str);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        String str2 = "";
        String str3 = "";
        int i = 0;
        int i2 = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                DataObject dataObject = new DataObject();
                dataObject.dataSet = this.props.getDataSet();
                dataObject.proteinSequence = str3;
                dataObject.geneId = str2;
                this.geneIdHashMap.put(str2, dataObject);
                System.out.println((i + 1) + " sequences found.");
                bufferedReader.close();
                System.out.println("FAA parsing done.");
                return;
            }
            if (!readLine.contains(">")) {
                str3 = str3 + readLine;
            } else if (str3.isEmpty()) {
                str2 = readLine.substring(1);
            } else {
                DataObject dataObject2 = new DataObject();
                dataObject2.dataSet = this.props.getDataSet();
                dataObject2.proteinSequence = str3;
                dataObject2.geneId = str2;
                dataObject2.length = str3.length();
                this.geneIdHashMap.put(str2, dataObject2);
                i++;
                str3 = "";
                str2 = readLine.substring(1);
                i2++;
                if (i2 >= 10000) {
                    i2 = 0;
                    System.out.println(i + " sequences of file have been processed.");
                }
            }
        }
    }

    public void parseFna(String str) throws Exception {
        System.out.println("Parsing " + str);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        String str2 = "";
        String str3 = "";
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                this.contigIdLengthMap.put(str2, Integer.valueOf(str3.length()));
                bufferedReader.close();
                System.out.println("Getting parsing done.");
                return;
            } else if (!readLine.contains(">")) {
                str3 = str3 + readLine;
            } else if (str3.isEmpty()) {
                str2 = readLine.substring(1);
            } else {
                this.contigIdLengthMap.put(str2, Integer.valueOf(str3.length()));
                str2 = readLine.substring(1);
                str3 = "";
            }
        }
    }

    public void parseGff(String str) throws Exception {
        System.out.println("Parsing " + str);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                System.out.println("GFF Parsing done");
                return;
            }
            if (!readLine.contains("exon") && !readLine.contains("repeat_region") && !readLine.startsWith(Store.NAME_SEPARATOR)) {
                String[] split = readLine.split("\t");
                int indexOf = split[8].indexOf("locus_tag=");
                String substring = split[8].substring(indexOf + "locus_tag=".length(), split[8].indexOf(";", indexOf + 1));
                DataObject dataObject = this.geneIdHashMap.get(substring);
                if (dataObject == null) {
                    System.out.println(substring + " not found");
                } else {
                    dataObject.contigId = split[0];
                    if (this.contigToBin.containsKey(dataObject.contigId)) {
                        dataObject.binnings = this.contigToBin.get(dataObject.contigId);
                    }
                    if (this.coverageIdObjectHashMap.containsKey(dataObject.contigId)) {
                        for (Coverage coverage : this.coverageIdObjectHashMap.get(dataObject.contigId)) {
                            dataObject.coverage.put(coverage.id, coverage);
                        }
                    }
                    dataObject.genePredictionTools = split[1];
                    dataObject.type = split[2];
                    dataObject.start = Integer.parseInt(split[3]);
                    dataObject.stop = Integer.parseInt(split[4]);
                    try {
                        dataObject.score = Float.parseFloat(split[5]);
                    } catch (NumberFormatException e) {
                        dataObject.score = 0.0f;
                    }
                    dataObject.strand = split[6];
                    dataObject.frame = split[7];
                }
            }
        }
    }

    public void parsePhylodist(String str) throws Exception {
        System.out.println("Parsing " + str);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                System.out.println("LCA parsing done");
                return;
            }
            String[] split = readLine.split("\t");
            DataObject dataObject = this.geneIdHashMap.get(split[0]);
            String[] split2 = split[4].split(";");
            if (dataObject != null) {
                dataObject.Kingdom = split2[0].trim();
                dataObject.Phylum = split2[1].trim();
                dataObject.Class = split2[2].trim();
                dataObject.Order = split2[3].trim();
                dataObject.Family = split2[4].trim();
                dataObject.Genus = split2[5].trim();
                dataObject.Species = split2[6].trim();
                dataObject.Strain = split2[7].trim();
            } else {
                System.out.println("Gene ID could not be found during LCA parsing step " + split[0]);
            }
        }
    }

    public void parseKoData(String str, CKeggDataFacade cKeggDataFacade) throws Exception {
        CKeggDataEntity cKeggDataEntity;
        System.out.println("Parsing " + str);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split("\t");
            DataObject dataObject = this.geneIdHashMap.get(split[0]);
            if (dataObject != null) {
                dataObject.ko = "ko:" + split[2].substring(split[2].indexOf(":") + 1);
                if (dataObject.ko != null && (cKeggDataEntity = cKeggDataFacade.get(dataObject.ko)) != null) {
                    if (cKeggDataEntity.getPathways() != null) {
                        for (CPathwayEntity cPathwayEntity : cKeggDataEntity.getPathways().values()) {
                            Pathway pathway = new Pathway();
                            pathway.title = cPathwayEntity.getTitle();
                            pathway.type = cPathwayEntity.getType();
                            pathway.pathId = cPathwayEntity.getPathId();
                            dataObject.pathways.put(pathway.pathId, pathway);
                        }
                    }
                    if (cKeggDataEntity.getEcIds() != null) {
                        dataObject.ecs.addAll(cKeggDataEntity.getEcIds());
                    }
                }
            } else {
                System.err.println("Could not find GeneID " + split[0] + " during KO ID parsing step");
            }
        }
    }

    public void parseProductNames(String str) throws Exception {
        System.out.println("Parsing " + str);
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                System.out.println("Product Names parsing done");
                return;
            }
            String[] split = readLine.split("\t");
            DataObject dataObject = this.geneIdHashMap.get(split[0]);
            if (dataObject == null) {
                System.out.println("Not found?" + split[0]);
            } else if (!dataObject.blastfound) {
                dataObject.blastfound = true;
                dataObject.salltitles = split[1];
            }
        }
    }

    public String getGeneId(String str) {
        int indexOf = str.indexOf("locus_tag=");
        return str.substring(indexOf + "locus_tag=".length(), str.indexOf(";", indexOf));
    }

    private void parseBinning(String str) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split("\t");
            ArrayList<Binning> arrayList = new ArrayList<>();
            for (int i = 1; i < split.length; i++) {
                Binning binning = new Binning();
                binning.label = "metabat";
                for (String str2 : split[i].split(",")) {
                    binning.bins.add(str2);
                }
                arrayList.add(binning);
            }
            this.contigToBin.put(split[0], arrayList);
        }
    }

    private void parseECs(String str) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            String[] split = readLine.split("\t");
            DataObject dataObject = this.geneIdHashMap.get(split[0]);
            if (dataObject != null) {
                dataObject.ecs.add(split[2]);
            }
        }
    }
}
