package org.forester.application;

import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.forester.io.parsers.FastaParser;
import org.forester.phylogeny.data.DomainArchitecture;
import org.forester.sequence.Sequence;
import org.forester.util.EasyWriter;
import org.forester.util.ForesterUtil;

/* JADX WARN: Classes with same name are omitted:
  input_file:lib/forester_1028.jar:org/forester/application/aaa.class
 */
/* loaded from: input_file:org/forester/application/aaa.class */
public class aaa {
    public static final Pattern GN_PATTERN = Pattern.compile("GN=(\\S+)\\s");
    public static final Pattern RANGE_PATTERN = Pattern.compile("\\[(\\d+-\\d+)\\]");
    public static final int MIN_LENGTH = 85;

    public static void main(String[] strArr) {
        try {
            EasyWriter easyWriter = (EasyWriter) ForesterUtil.createEasyWriter("aaa_out");
            System.out.println("STARTING...");
            ArrayList arrayList = new ArrayList();
            List<Sequence> parse = FastaParser.parse(new FileInputStream("C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20_2.fasta"));
            int size = parse.size();
            ArrayList<String> arrayList2 = new ArrayList();
            for (Sequence sequence : parse) {
                if (sequence.getLength() < 85) {
                    arrayList.add(sequence);
                } else {
                    Matcher matcher = GN_PATTERN.matcher(sequence.getIdentifier());
                    String str = "";
                    if (matcher.find()) {
                        str = matcher.group(1);
                    } else {
                        System.out.println("ERROR: no gene for: " + sequence.getIdentifier());
                        System.exit(-1);
                    }
                    arrayList2.add(DomainArchitecture.NHX_SEPARATOR + str + "|" + sequence.getIdentifier() + IOUtils.LINE_SEPARATOR_UNIX + sequence.getMolecularSequenceAsString());
                }
            }
            HashSet hashSet = new HashSet();
            HashSet hashSet2 = new HashSet();
            Collections.sort(arrayList2);
            int i = 0;
            ArrayList arrayList3 = new ArrayList();
            ArrayList arrayList4 = new ArrayList();
            ArrayList<String> arrayList5 = new ArrayList();
            for (String str2 : arrayList2) {
                Matcher matcher2 = RANGE_PATTERN.matcher(str2);
                Matcher matcher3 = GN_PATTERN.matcher(str2);
                String str3 = "";
                if (matcher2.find()) {
                    str3 = matcher2.group(1);
                } else {
                    System.out.println("ERROR: no range for: " + str2);
                    System.exit(-1);
                }
                matcher3.find();
                String str4 = matcher3.group(1) + "_" + str3;
                if (hashSet.contains(str4)) {
                    arrayList3.add(str2);
                } else {
                    hashSet.add(str4);
                    String str5 = str2.split(IOUtils.LINE_SEPARATOR_UNIX)[1];
                    if (hashSet2.contains(str5)) {
                        arrayList4.add(str2);
                    } else {
                        hashSet2.add(str5);
                        arrayList5.add(str2);
                        i++;
                    }
                }
            }
            String str6 = "___";
            boolean z = true;
            ArrayList arrayList6 = new ArrayList();
            for (String str7 : arrayList5) {
                Matcher matcher4 = GN_PATTERN.matcher(str7);
                matcher4.find();
                String group = matcher4.group(1);
                if (!str6.equals(group) && !z) {
                    doit(arrayList6, easyWriter);
                    arrayList6 = new ArrayList();
                }
                str6 = group;
                z = false;
                arrayList6.add(str7);
            }
            doit(arrayList6, easyWriter);
            easyWriter.println("");
            easyWriter.println("");
            easyWriter.println("Removed because same GN and region:");
            Iterator it = arrayList3.iterator();
            while (it.hasNext()) {
                easyWriter.println((String) it.next());
            }
            easyWriter.println("");
            easyWriter.println("");
            easyWriter.println("Removed because identical mol sequence:");
            Iterator it2 = arrayList4.iterator();
            while (it2.hasNext()) {
                easyWriter.println((String) it2.next());
            }
            easyWriter.println("");
            easyWriter.println("");
            easyWriter.println("Removed because too short:");
            Iterator it3 = arrayList.iterator();
            while (it3.hasNext()) {
                easyWriter.println(((Sequence) it3.next()).toString());
            }
            easyWriter.println("");
            easyWriter.println("");
            easyWriter.println("initial:" + size);
            easyWriter.println("ignored because shorter than 85aa: " + arrayList.size());
            easyWriter.println("unique   : " + i);
            easyWriter.println("unique   : " + arrayList5.size());
            easyWriter.println("duplicate because gn and range same: " + arrayList3.size());
            easyWriter.println("duplicate because mol seq same     : " + arrayList4.size());
            easyWriter.flush();
            easyWriter.close();
            System.out.println("DONE ");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void doit(List<String> list, EasyWriter easyWriter) throws IOException {
        int size = list.size();
        if (size == 1) {
            easyWriter.println(list.get(0));
            return;
        }
        int i = 1;
        for (String str : list) {
            easyWriter.println(new StringBuffer(str).insert(str.indexOf("|"), "__" + i + "_OF_" + size).toString());
            i++;
        }
    }
}
