package org.solrmarc.tools;

import java.text.Normalizer;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.solrmarc.index.extractor.formatter.FieldFormatter;

/* loaded from: input_file:org/solrmarc/tools/DataUtil.class */
public class DataUtil {
    private static final String TWO_DIGIT_PREFIXES = "(20|19|18|17|16|15|14|13|12|11|10)";
    private static final Pattern FOUR_DIGIT_PATTERN_BRACES = Pattern.compile("\\[[12]\\d{3,3}\\]");
    private static final Pattern FOUR_DIGIT_PATTERN_ONE_BRACE = Pattern.compile("\\[[12]\\d{3,3}");
    private static final Pattern FOUR_DIGIT_PATTERN_STARTING_WITH_1_2 = Pattern.compile("(20|19|18|17|16|15|14|13|12|11|10)[0-9][0-9]");
    private static final Pattern FOUR_DIGIT_PATTERN_OTHER_1 = Pattern.compile("l\\d{3,3}");
    private static final Pattern FOUR_DIGIT_PATTERN_OTHER_2 = Pattern.compile("\\[19\\]\\d{2,2}");
    private static final Pattern FOUR_DIGIT_PATTERN_OTHER_3 = Pattern.compile("(20|19|18|17|16|15|14|13|12|11|10)[0-9][-?0-9]");
    private static final Pattern FOUR_DIGIT_PATTERN_OTHER_4 = Pattern.compile("i.e. (20|19|18|17|16|15|14|13|12|11|10)[0-9][0-9]");
    private static final Pattern BC_DATE_PATTERN = Pattern.compile("[0-9]+ [Bb][.]?[Cc][.]?");
    private static final Pattern FOUR_DIGIT_PATTERN = Pattern.compile("\\d{4,4}");
    protected static Logger logger = Logger.getLogger(DataUtil.class.getName());
    private static Pattern ACCENTS = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
    private static Pattern PUNCT_OR_SPACE = Pattern.compile("[ \\p{Punct}]+", 256);

    public static String cleanDate(String str) {
        Matcher matcher = FOUR_DIGIT_PATTERN_BRACES.matcher(str);
        Matcher matcher2 = FOUR_DIGIT_PATTERN_ONE_BRACE.matcher(str);
        Matcher matcher3 = FOUR_DIGIT_PATTERN_STARTING_WITH_1_2.matcher(str);
        Matcher matcher4 = FOUR_DIGIT_PATTERN_OTHER_1.matcher(str);
        Matcher matcher5 = FOUR_DIGIT_PATTERN_OTHER_2.matcher(str);
        Matcher matcher6 = FOUR_DIGIT_PATTERN_OTHER_3.matcher(str);
        Matcher matcher7 = FOUR_DIGIT_PATTERN_OTHER_4.matcher(str);
        Matcher matcher8 = FOUR_DIGIT_PATTERN.matcher(str);
        Matcher matcher9 = BC_DATE_PATTERN.matcher(str);
        String str2 = null;
        if (matcher.find()) {
            str2 = removeOuterBrackets(matcher.group());
            if (matcher8.find()) {
                String group = matcher8.group();
                if (!group.equals(str2)) {
                    String str3 = "" + group;
                }
            }
        } else if (matcher7.find()) {
            str2 = matcher7.group().replaceAll("i.e. ", "");
        } else if (matcher2.find()) {
            str2 = removeOuterBrackets(matcher2.group());
            if (matcher8.find()) {
                String group2 = matcher8.group();
                if (!group2.equals(str2)) {
                    String str4 = "" + group2;
                }
            }
        } else if (matcher9.find()) {
            str2 = null;
        } else if (matcher3.find()) {
            str2 = matcher3.group();
        } else if (matcher4.find()) {
            str2 = matcher4.group().replaceAll("l", "1");
        } else if (matcher5.find()) {
            str2 = matcher5.group().replaceAll("\\[", "").replaceAll("\\]", "");
        } else if (matcher6.find()) {
            str2 = matcher6.group().replaceAll("[-?]", "0");
        }
        if (str2 != null) {
            try {
                if (Integer.parseInt(str2) > Integer.parseInt(new SimpleDateFormat("yyyy").format(Calendar.getInstance().getTime())) + 1) {
                    str2 = null;
                }
            } catch (NumberFormatException e) {
                str2 = null;
            }
        }
        if (str2 != null) {
            logger.debug("Date : " + str + " mapped to : " + str2);
        } else {
            logger.debug("No Date match: " + str);
        }
        return str2;
    }

    public static String cleanData(String str) {
        String str2;
        String str3 = str;
        do {
            str2 = str3;
            String replaceAll = str3.trim().replaceAll(" *([,/;:])$", "");
            if (replaceAll.endsWith(".") && !replaceAll.matches(".*[JS]r\\.$")) {
                if (replaceAll.matches(".*\\w\\w\\.$")) {
                    replaceAll = replaceAll.substring(0, replaceAll.length() - 1);
                } else if (replaceAll.matches(".*\\p{L}\\p{L}\\.$")) {
                    replaceAll = replaceAll.substring(0, replaceAll.length() - 1);
                } else if (replaceAll.matches(".*\\w\\p{InCombiningDiacriticalMarks}?\\w\\p{InCombiningDiacriticalMarks}?\\.$")) {
                    replaceAll = replaceAll.substring(0, replaceAll.length() - 1);
                } else if (replaceAll.matches(".*\\p{Punct}\\.$")) {
                    replaceAll = replaceAll.substring(0, replaceAll.length() - 1);
                }
            }
            str3 = removeOuterBrackets(replaceAll);
            if (str3.length() == 0) {
                return str3;
            }
        } while (!str3.equals(str2));
        return str3;
    }

    public static Set<String> cleanData(Set<String> set) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            linkedHashSet.add(cleanData(it.next()));
        }
        return linkedHashSet;
    }

    public static String removeAllTrailingCharAndPeriod(String str, String str2, String str3) {
        String str4;
        if (str == null) {
            return null;
        }
        String str5 = str;
        do {
            str4 = str5;
            str5 = removeTrailingCharAndPeriod(str5.trim(), str2, str3);
            if (str5.length() == 0) {
                return str5;
            }
        } while (!str5.equals(str4));
        return str5;
    }

    public static String removeTrailingCharAndPeriod(String str, String str2, String str3) {
        if (str == null) {
            return null;
        }
        return removeTrailingPeriod(removeTrailingChar(str, str2), str3);
    }

    public static String removeTrailingChar(String str, String str2) {
        return str == null ? str : str.trim().replaceAll(str2 + "$", "");
    }

    public static String removeTrailingPeriod(String str, String str2) {
        if (str == null) {
            return str;
        }
        String trim = str.trim();
        if (trim.endsWith(".") && trim.matches(".*" + str2 + "\\.$")) {
            trim = trim.substring(0, trim.length() - 1).trim();
        }
        return trim;
    }

    public static String removeOuterBrackets(String str) {
        if (str == null || str.length() == 0) {
            return str;
        }
        String trim = str.trim();
        if (trim.length() > 0) {
            boolean z = trim.charAt(0) == '[';
            boolean endsWith = trim.endsWith("]");
            if (z && endsWith && trim.indexOf(91, 1) == -1 && trim.lastIndexOf(93, trim.length() - 2) == -1) {
                trim = trim.substring(1, trim.length() - 1);
            } else if (z && trim.indexOf(93) == -1) {
                trim = trim.substring(1);
            } else if (endsWith && trim.indexOf(91) == -1) {
                trim = trim.substring(0, trim.length() - 1);
            }
        }
        return trim.trim();
    }

    public static String toTitleCase(String str) {
        boolean z = true;
        for (char c : str.toCharArray()) {
            if (Character.isLowerCase(c)) {
                z = false;
            }
        }
        StringBuilder sb = new StringBuilder();
        int i = 0;
        char c2 = ' ';
        for (char c3 : str.toCharArray()) {
            boolean isUpperCase = Character.isUpperCase(c3);
            boolean isLowerCase = Character.isLowerCase(c3);
            boolean z2 = " .-/".indexOf(c3) >= 0;
            boolean z3 = " .-/".indexOf(c2) >= 0;
            boolean z4 = "'".indexOf(c2) >= 0;
            boolean z5 = false;
            boolean z6 = false;
            if (z3 && isLowerCase) {
                z5 = true;
            } else if (z4 && isLowerCase && i <= 2) {
                z5 = true;
            } else if (z && isUpperCase && i >= 1) {
                z6 = true;
            }
            i = z2 ? 0 : i + 1;
            sb.append(z5 ? Character.toUpperCase(c3) : z6 ? Character.toLowerCase(c3) : c3);
            c2 = c3;
        }
        return sb.toString();
    }

    public static EnumSet<FieldFormatter.eCleanVal> getCleanValForParam(String str) {
        EnumSet<FieldFormatter.eCleanVal> noneOf = EnumSet.noneOf(FieldFormatter.eCleanVal.class);
        Iterator it = EnumSet.allOf(FieldFormatter.eCleanVal.class).iterator();
        while (it.hasNext()) {
            FieldFormatter.eCleanVal ecleanval = (FieldFormatter.eCleanVal) it.next();
            if (str.contains(ecleanval.toString())) {
                noneOf.add(ecleanval);
            }
        }
        if (str.contains("titleSortUpper")) {
            noneOf.addAll(EnumSet.of(FieldFormatter.eCleanVal.CLEAN_EACH, FieldFormatter.eCleanVal.STRIP_ACCCENTS, FieldFormatter.eCleanVal.STRIP_ALL_PUNCT, FieldFormatter.eCleanVal.TO_UPPER, FieldFormatter.eCleanVal.STRIP_INDICATOR));
        }
        if (str.contains("titleSortLower")) {
            noneOf.addAll(EnumSet.of(FieldFormatter.eCleanVal.CLEAN_EACH, FieldFormatter.eCleanVal.STRIP_ACCCENTS, FieldFormatter.eCleanVal.STRIP_ALL_PUNCT, FieldFormatter.eCleanVal.TO_LOWER, FieldFormatter.eCleanVal.STRIP_INDICATOR));
        }
        if (str.matches(".*clean([^E].*|$)")) {
            noneOf.addAll(EnumSet.of(FieldFormatter.eCleanVal.CLEAN_EACH, FieldFormatter.eCleanVal.CLEAN_END));
        }
        return noneOf;
    }

    public static String cleanByVal(String str, EnumSet<FieldFormatter.eCleanVal> enumSet) {
        String cleanData = enumSet.contains(FieldFormatter.eCleanVal.CLEAN_EACH) ? cleanData(str) : str;
        if (!enumSet.contains(FieldFormatter.eCleanVal.STRIP_ACCCENTS) && !enumSet.contains(FieldFormatter.eCleanVal.STRIP_ALL_PUNCT) && !enumSet.contains(FieldFormatter.eCleanVal.TO_LOWER) && !enumSet.contains(FieldFormatter.eCleanVal.TO_UPPER) && !enumSet.contains(FieldFormatter.eCleanVal.TO_TITLECASE) && !enumSet.contains(FieldFormatter.eCleanVal.STRIP_INDICATOR_1) && !enumSet.contains(FieldFormatter.eCleanVal.STRIP_INDICATOR_2) && !enumSet.contains(FieldFormatter.eCleanVal.STRIP_INDICATOR)) {
            return cleanData;
        }
        if (enumSet.contains(FieldFormatter.eCleanVal.STRIP_ACCCENTS)) {
            cleanData = stripAccents(cleanData);
        }
        if (enumSet.contains(FieldFormatter.eCleanVal.STRIP_ALL_PUNCT)) {
            cleanData = stripAllPunct(cleanData);
        }
        if (!enumSet.contains(FieldFormatter.eCleanVal.UNTRIMMED)) {
            cleanData = cleanData.trim();
        }
        if (enumSet.contains(FieldFormatter.eCleanVal.TO_LOWER)) {
            cleanData = cleanData.toLowerCase();
        } else if (enumSet.contains(FieldFormatter.eCleanVal.TO_UPPER)) {
            cleanData = cleanData.toUpperCase();
        } else if (enumSet.contains(FieldFormatter.eCleanVal.TO_TITLECASE)) {
            cleanData = toTitleCase(cleanData);
        }
        return cleanData;
    }

    public static String stripAllPunct(String str) {
        String replaceAll = str.replaceAll("( |\\p{Punct})+", " ");
        String replaceAll2 = PUNCT_OR_SPACE.matcher(str).replaceAll(" ");
        return replaceAll.equals(replaceAll2) ? replaceAll : replaceAll2.replaceAll("( |\\p{Punct})+", " ");
    }

    public static String stripAccents(String str) {
        String replaceAll = ACCENTS.matcher(Normalizer.normalize(str, Normalizer.Form.NFD)).replaceAll("");
        StringBuilder sb = new StringBuilder();
        boolean z = false;
        for (char c : replaceAll.toCharArray()) {
            char foldDiacriticLatinChar = Utils.foldDiacriticLatinChar(c);
            if (foldDiacriticLatinChar != 0) {
                sb.append(foldDiacriticLatinChar);
                z = true;
            } else {
                sb.append(c);
            }
        }
        if (z) {
            replaceAll = sb.toString();
        }
        return replaceAll;
    }
}
