/*
 * Decompiled with CFR 0.152.
 */
package org.unicode.cldr.util;

import com.ibm.icu.dev.test.util.TransliteratorUtilities;
import com.ibm.icu.dev.test.util.UnicodeMap;
import com.ibm.icu.dev.test.util.UnicodeProperty;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSetIterator;
import com.ibm.icu.util.ULocale;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

public class Segmenter {
    private static final boolean JDK4HACK = true;
    public static final int REGEX_FLAGS = 44;
    private static final boolean SHOW_VAR_CONTENTS = false;
    private UnicodeMap samples = new UnicodeMap();
    public static final UnicodeSet DEBUG_RETAIN = null;
    public static final double NOBREAK_SUPPLEMENTARY = 0.1;
    public static final double BREAK_SOT = 0.2;
    public static final double BREAK_EOT = 0.3;
    public static final double BREAK_ANY = 999.0;
    public static NumberFormat nf = NumberFormat.getInstance((ULocale)ULocale.ENGLISH);
    static Comparator LONGEST_STRING_FIRST;
    private List rules = new ArrayList(1);
    private List orders = new ArrayList(1);
    private double breakRule;
    static String[][] cannedRules;

    public static Builder make(UnicodeProperty.Factory factory, String type) {
        Builder b = new Builder(factory);
        for (int i = 0; i < cannedRules.length; ++i) {
            if (!cannedRules[i][0].equals(type)) continue;
            for (int j = 1; j < cannedRules[i].length; ++j) {
                b.addLine(cannedRules[i][j]);
            }
            return b;
        }
        return null;
    }

    public boolean breaksAt(CharSequence text, int position) {
        if (position == 0) {
            this.breakRule = 0.2;
            return true;
        }
        if (position == text.length()) {
            this.breakRule = 0.3;
            return true;
        }
        if (UTF16.isLeadSurrogate((char)text.charAt(position - 1)) && UTF16.isTrailSurrogate((char)text.charAt(position))) {
            this.breakRule = 0.1;
            return false;
        }
        for (int i = 0; i < this.rules.size(); ++i) {
            Rule rule = (Rule)this.rules.get(i);
            byte result = rule.matches(text, position);
            if (result == 0) continue;
            this.breakRule = (Double)this.orders.get(i);
            return result == 1;
        }
        this.breakRule = 999.0;
        return true;
    }

    public int getRuleStatusVec(int[] ruleStatus) {
        ruleStatus[0] = 0;
        return 1;
    }

    public void add(double order, Rule rule) {
        this.orders.add(new Double(order));
        this.rules.add(rule);
    }

    public Rule get(double order) {
        int loc = this.orders.indexOf(new Double(order));
        if (loc < 0) {
            return null;
        }
        return (Rule)this.rules.get(loc);
    }

    public double getBreakRule() {
        return this.breakRule;
    }

    public String toString() {
        return this.toString(false);
    }

    public String toString(boolean showResolved) {
        String result = "";
        for (int i = 0; i < this.rules.size(); ++i) {
            if (i != 0) {
                result = result + "\r\n";
            }
            result = result + this.orders.get(i) + ")\t" + ((Rule)this.rules.get(i)).toString(showResolved);
        }
        return result;
    }

    static boolean matchAfter(Matcher matcher, CharSequence text, int position) {
        return matcher.reset(text.subSequence(position, text.length())).lookingAt();
    }

    static boolean matchBefore(Matcher matcher, CharSequence text, int position) {
        return matcher.reset(text.subSequence(0, position)).matches();
    }

    public UnicodeMap getSamples() {
        return this.samples;
    }

    static {
        nf.setMinimumFractionDigits(0);
        LONGEST_STRING_FIRST = new Comparator(){

            public int compare(Object arg0, Object arg1) {
                int len1;
                String s0 = arg0.toString();
                String s1 = arg1.toString();
                int len0 = s0.length();
                if (len0 < (len1 = s1.length())) {
                    return 1;
                }
                if (len0 > len1) {
                    return -1;
                }
                return s0.compareTo(s1);
            }
        };
        cannedRules = new String[][]{{"GraphemeClusterBreak", "$CR=\\p{Grapheme_Cluster_Break=CR}", "$LF=\\p{Grapheme_Cluster_Break=LF}", "$Control=\\p{Grapheme_Cluster_Break=Control}", "$Extend=\\p{Grapheme_Cluster_Break=Extend}", "$L=\\p{Grapheme_Cluster_Break=L}", "$V=\\p{Grapheme_Cluster_Break=V}", "$T=\\p{Grapheme_Cluster_Break=T}", "$LV=\\p{Grapheme_Cluster_Break=LV}", "$LVT=\\p{Grapheme_Cluster_Break=LVT}", "3) $CR  \t\u00d7  \t$LF", "4) ( $Control | $CR | $LF ) \t\u00f7", "5) \u00f7 \t( $Control | $CR | $LF )", "6) $L \t\u00d7 \t( $L | $V | $LV | $LVT )", "7) ( $LV | $V ) \t\u00d7 \t( $V | $T )", "8) ( $LVT | $T) \t\u00d7 \t$T", "9) \u00d7 \t$Extend"}, {"LineBreak", "# Variables", "$AI=\\p{Line_Break=Ambiguous}", "$AL=\\p{Line_Break=Alphabetic}", "$B2=\\p{Line_Break=Break_Both}", "$BA=\\p{Line_Break=Break_After}", "$BB=\\p{Line_Break=Break_Before}", "$BK=\\p{Line_Break=Mandatory_Break}", "$CB=\\p{Line_Break=Contingent_Break}", "$CL=\\p{Line_Break=Close_Punctuation}", "$CM=\\p{Line_Break=Combining_Mark}", "$CR=\\p{Line_Break=Carriage_Return}", "$EX=\\p{Line_Break=Exclamation}", "$GL=\\p{Line_Break=Glue}", "$H2=\\p{Line_Break=H2}", "$H3=\\p{Line_Break=H3}", "$HY=\\p{Line_Break=Hyphen}", "$ID=\\p{Line_Break=Ideographic}", "$IN=\\p{Line_Break=Inseparable}", "$IS=\\p{Line_Break=Infix_Numeric}", "$JL=\\p{Line_Break=JL}", "$JT=\\p{Line_Break=JT}", "$JV=\\p{Line_Break=JV}", "$LF=\\p{Line_Break=Line_Feed}", "$NL=\\p{Line_Break=Next_Line}", "$NS=\\p{Line_Break=Nonstarter}", "$NU=\\p{Line_Break=Numeric}", "$OP=\\p{Line_Break=Open_Punctuation}", "$PO=\\p{Line_Break=Postfix_Numeric}", "$PR=\\p{Line_Break=Prefix_Numeric}", "$QU=\\p{Line_Break=Quotation}", "$SA=\\p{Line_Break=Complex_Context}", "$SG=\\p{Line_Break=Surrogate}", "$SP=\\p{Line_Break=Space}", "$SY=\\p{Line_Break=Break_Symbols}", "$WJ=\\p{Line_Break=Word_Joiner}", "$XX=\\p{Line_Break=Unknown}", "$ZW=\\p{Line_Break=ZWSpace}", "# LB 1  Assign a line breaking class to each code point of the input. ", "# Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm.", "# NOTE: CB is ok to fall through, but must handle others here.", "$AL=[$AI $AL $XX $SA $SG]", "# WARNING: Fixes for Rule 9", "# Treat X CM* as if it were X.", "# Where X is any line break class except SP, BK, CR, LF, NL or ZW.", "$X=$CM*", "$AI=($AI $X)", "$AL=($AL $X)", "$B2=($B2 $X)", "$BA=($BA $X)", "$BB=($BB $X)", "$CB=($CB $X)", "$CL=($CL $X)", "$CM=($CM $X)", "$CM=($CM $X)", "$GL=($GL $X)", "$H2=($H2 $X)", "$H3=($H3 $X)", "$HY=($HY $X)", "$ID=($ID $X)", "$IN=($IN $X)", "$IS=($IS $X)", "$JL=($JL $X)", "$JT=($JT $X)", "$JV=($JV $X)", "$NS=($NS $X)", "$NU=($NU $X)", "$OP=($OP $X)", "$PO=($PO $X)", "$PR=($PR $X)", "$QU=($QU $X)", "$SA=($SA $X)", "$SG=($SG $X)", "$SY=($SY $X)", "$WJ=($WJ $X)", "$XX=($XX $X)", "# OUT OF ORDER ON PURPOSE", "# LB 10  Treat any remaining combining mark as AL.", "$AL=($AL | ^ $CM | (?<=[$SP $BK $CR $LF $NL $ZW]) $CM)", "# LB 4  Always break after hard line breaks (but never between CR and LF).", "4) $BK \u00f7", "# LB 5  Treat CR followed by LF, as well as CR, LF and NL as hard line breaks.", "5.01) $CR \u00d7 $LF", "5.02) $CR \u00f7", "5.03) $LF \u00f7", "5.04) $NL \u00f7", "# LB 6  Do not break before hard line breaks.", "6) \u00d7 ( $BK | $CR | $LF | $NL )", "# LB 7  Do not break before spaces or zero-width space.", "7.01) \u00d7 $SP", "7.02) \u00d7 $ZW", "# LB 8  Break after zero-width space.", "8) $ZW \u00f7", "# LB 9  Do not break a combining character sequence; treat it as if it has the LB class of the base character", "# in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.)", "9) [^$SP $BK $CR $LF $NL $ZW] \u00d7 $CM", "#WARNING: this is done by modifying the variable values for all but SP.... That is, $AL is really ($AI $CM*)!", "# LB 11  Do not break before or after WORD JOINER and related characters.", "11.01) \u00d7 $WJ", "11.02) $WJ \u00d7", "# LB 12  Do not break before or after NBSP and related characters.", "12.01) [^$SP] \u00d7 $GL", "12.02) $GL \u00d7", "# LB 13  Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces.", "# Using customization 7.", "13.01) [^$NU] \u00d7 $CL", "13.02) \u00d7 $EX", "13.03) [^$NU] \u00d7 $IS", "13.04) [^$NU] \u00d7 $SY", "#LB 14  Do not break after \u2018[\u2019, even after spaces.", "14) $OP $SP* \u00d7", "# LB 15  Do not break within \u2018\"[\u2019, even with intervening spaces.", "15) $QU $SP* \u00d7 $OP", "# LB 16  Do not break within \u2018]h\u2019, even with intervening spaces.", "16) $CL $SP* \u00d7 $NS", "# LB 17  Do not break within \u2018\u2014\u2014\u2019, even with intervening spaces.", "17) $B2 $SP* \u00d7 $B2", "# LB 18  Break after spaces.", "18) $SP \u00f7", "# LB 19  Do not break before or after \u2018\"\u2019.", "19.01)  \u00d7 $QU", "19.02) $QU \u00d7", "# LB 20  Break before and after unresolved CB.", "20.01)  \u00f7 $CB", "20.02) $CB \u00f7", "# LB 21  Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents.", "21.01) \u00d7 $BA", "21.02) \u00d7 $HY", "21.03) \u00d7 $NS", "21.04) $BB \u00d7", "# LB 22  Do not break between two ellipses, or between letters or numbers and ellipsis.", "22.01) $AL \u00d7 $IN", "22.02) $ID \u00d7 $IN", "22.03) $IN \u00d7 $IN", "22.04) $NU \u00d7 $IN", "# LB 23  Do not break within \u2018a9\u2019, \u20183a\u2019, or \u2018H%\u2019.", "23.01) $ID \u00d7 $PO", "23.02) $AL \u00d7 $NU", "23.03) $NU \u00d7 $AL", "# LB 24  Do not break between prefix and letters or ideographs.", "24.01) $PR \u00d7 $ID", "24.02) $PR \u00d7 $AL", "24.03) $PO \u00d7 $AL", "# Using customization 7", "# LB 18  Do not break between the following pairs of classes.", "# LB 18-alternative: $PR? ( $OP | $HY )? $NU ($NU | $SY | $IS)* $CL? $PO?", "# Insert \u00d7 every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after ", "25.01) ($PR | $PO) \u00d7 ( $OP | $HY )? $NU", "25.02) ( $OP | $HY ) \u00d7 $NU", "25.03) $NU \u00d7 ($NU | $SY | $IS)", "25.04) $NU ($NU | $SY | $IS)* \u00d7 ($NU | $SY | $IS | $CL)", "25.05) $NU ($NU | $SY | $IS)* $CL? \u00d7 ($PO | $PR)", "#LB 26 Do not break a Korean syllable.", "26.01) $JL  \u00d7 $JL | $JV | $H2 | $H3", "26.02) $JV | $H2 \u00d7 $JV | $JT", "26.03) $JT | $H3 \u00d7 $JT", "# LB 27 Treat a Korean Syllable Block the same as ID.", "27.01) $JL | $JV | $JT | $H2 | $H3 \u00d7 $IN", "27.02) $JL | $JV | $JT | $H2 | $H3  \u00d7 $PO", "27.03) $PR \u00d7 $JL | $JV | $JT | $H2 | $H3", "# LB 28  Do not break between alphabetics (\"at\").", "28) $AL \u00d7 $AL", "# LB 29  Do not break between numeric punctuation and alphabetics (\"e.g.\").", "29) $IS \u00d7 $AL", "# LB 30  Do not break between letters, numbers or ordinary symbols and opening or closing punctuation.", "30.01) ($AL | $NU) \u00d7 $OP", "30.02) $CL \u00d7 ($AL | $NU)"}, {"SentenceBreak", "# GC stuff", "$GCCR=\\p{Grapheme_Cluster_Break=CR}", "$GCLF=\\p{Grapheme_Cluster_Break=LF}", "$GCControl=\\p{Grapheme_Cluster_Break=Control}", "$GCExtend=\\p{Grapheme_Cluster_Break=Extend}", "# Normal variables", "$Format=\\p{Sentence_Break=Format}", "$Sep=\\p{Sentence_Break=Sep}", "$Sp=\\p{Sentence_Break=Sp}", "$Lower=\\p{Sentence_Break=Lower}", "$Upper=\\p{Sentence_Break=Upper}", "$OLetter=\\p{Sentence_Break=OLetter}", "$Numeric=\\p{Sentence_Break=Numeric}", "$ATerm=\\p{Sentence_Break=ATerm}", "$STerm=\\p{Sentence_Break=STerm}", "$Close=\\p{Sentence_Break=Close}", "$Any=.", "# Expresses the negation in rule 8; can't do this with normal regex, but works with UnicodeSet, which is all we need.", "# $ATerm and $Sterm are temporary, to match ICU until UTC decides.", "# WARNING: For Rule 5, now add format and extend to everything but Sep", "$X=[$Format $GCExtend]*", "$Sp=(($Sp | [$Sp - $GCControl] $GCExtend*) $Format*)", "$Lower=($Lower $X)", "$Upper=($Upper $X)", "$OLetter=($OLetter $X)", "$Numeric=($Numeric $X)", "$ATerm=($ATerm $X)", "$STerm=($STerm $X)", "$Close=($Close $X)", "# Do not break within CRLF", "3) $GCCR  \t\u00d7  \t$GCLF", "# Break after paragraph separators.", "4) $Sep  \t\u00f7", "# Ignore Format and Extend characters, except when they appear at the beginning of a region of text.", "# (See Section 6.2 Grapheme Cluster and Format Rules.)", "# WARNING: Implemented as don't break before format (except after linebreaks),", "# AND add format and extend in all variables definitions that appear after this point!", "5) [^$Sep] \u00d7 [$Format $GCExtend]", "# Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter,", "# is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase.", "# For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence.", "6) $ATerm \t\u00d7 \t$Numeric", "7) $Upper $ATerm \t\u00d7 \t$Upper", "8) $ATerm $Close* $Sp* \t\u00d7 \t[^$OLetter $Upper $Lower $Sep]* $Lower", "8.1) ($STerm | $ATerm) $Close* $Sp* \t\u00d7 \t($STerm | $ATerm)", "#Break after sentence terminators, but include closing punctuation, trailing spaces, and (optionally) a paragraph separator.", "9) ( $STerm | $ATerm ) $Close* \t\u00d7 \t( $Close | $Sp | $Sep )", "# Note the fix to $Sp*, $Sep?", "10) ( $STerm | $ATerm ) $Close* $Sp* \t\u00d7 \t( $Sp | $Sep )", "11) ( $STerm | $ATerm ) $Close* $Sp* $Sep? \u00f7", "#Otherwise, do not break", "12) \u00d7 \t$Any"}, {"WordBreak", "# GC stuff", "$GCCR=\\p{Grapheme_Cluster_Break=CR}", "$GCLF=\\p{Grapheme_Cluster_Break=LF}", "$GCControl=\\p{Grapheme_Cluster_Break=Control}", "$GCExtend=\\p{Grapheme_Cluster_Break=Extend}", "$Sep=\\p{Sentence_Break=Sep}", "# Now normal variables", "$Format=\\p{Word_Break=Format}", "$Katakana=\\p{Word_Break=Katakana}", "$ALetter=\\p{Word_Break=ALetter}", "$MidLetter=\\p{Word_Break=MidLetter}", "$MidNum=\\p{Word_Break=MidNum}", "$Numeric=\\p{Word_Break=Numeric}", "$ExtendNumLet=\\p{Word_Break=ExtendNumLet}", "# WARNING: For Rule 4: Fixes for GC, Format", "# Add format and extend to everything", "$X=[$Format $GCExtend]*", "$Katakana=($Katakana $X)", "$ALetter=($ALetter $X)", "$MidLetter=($MidLetter $X)", "$MidNum=($MidNum $X)", "$Numeric=($Numeric $X)", "$ExtendNumLet=($ExtendNumLet $X)", "3) $GCCR  \t\u00d7  \t$GCLF", "# Ignore Format and Extend characters, except when they appear at the beginning of a region of text.", "# (See Section 6.2 Grapheme Cluster and Format Rules.)", "# WARNING: Implemented as don't break before format (except after linebreaks),", "# AND add format and extend in all variables definitions that appear after this point!", "4) [^ $Sep ] \u00d7 [$Format $GCExtend]", "# Vanilla rules", "5)$ALetter  \t\u00d7  \t$ALetter", "6)$ALetter \t\u00d7 \t$MidLetter $ALetter", "7)$ALetter $MidLetter \t\u00d7 \t$ALetter", "8)$Numeric \t\u00d7 \t$Numeric", "9)$ALetter \t\u00d7 \t$Numeric", "10)$Numeric \t\u00d7 \t$ALetter", "11)$Numeric $MidNum \t\u00d7 \t$Numeric", "12)$Numeric \t\u00d7 \t$MidNum $Numeric", "13)$Katakana \t\u00d7 \t$Katakana", "13.1)($ALetter | $Numeric | $Katakana | $ExtendNumLet) \t\u00d7 \t$ExtendNumLet", "13.2)$ExtendNumLet \t\u00d7 \t($ALetter | $Numeric | $Katakana)"}};
    }

    public static class Builder {
        private UnicodeProperty.Factory propFactory;
        private UnicodeSet.XSymbolTable symbolTable;
        private List rawVariables = new ArrayList();
        private Map xmlRules = new TreeMap();
        private Map htmlRules = new TreeMap();
        private List lastComments = new ArrayList();
        private UnicodeMap samples = new UnicodeMap();
        private transient Matcher whiteSpace = Pattern.compile("\\s+", 44).matcher("");
        private transient Matcher identifierMatcher = Pattern.compile("[$]\\p{Alpha}\\p{Alnum}*", 44).matcher("");
        private transient Matcher brokenIdentifierMatcher = Pattern.compile("[^$\\p{Alpha}]\\p{Alnum}", 44).matcher("");
        static boolean SHOW_SAMPLES = false;
        static MyComposer myComposer = new MyComposer();
        private Map variables = new TreeMap(LONGEST_STRING_FIRST);
        private Map rules = new TreeMap();
        transient ParsePosition parsePosition = new ParsePosition(0);
        static UnicodeSet JavaRegex_uxxx = new UnicodeSet("[[:White_Space:][:defaultignorablecodepoint:]#]");
        static UnicodeSet JavaRegex_slash = new UnicodeSet("[[:Pattern_White_Space:]\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]");
        static CodePointShower JavaRegexShower = new CodePointShower(){

            public String show(int codePoint) {
                if (Builder.JavaRegex_uxxx.contains(codePoint)) {
                    if (codePoint > 65535) {
                        return "\\u" + Utility.hex((char)UTF16.getLeadSurrogate((int)codePoint)) + "\\u" + Utility.hex((char)UTF16.getTrailSurrogate((int)codePoint));
                    }
                    return "\\u" + Utility.hex((long)codePoint);
                }
                if (Builder.JavaRegex_slash.contains(codePoint)) {
                    return "\\" + UTF16.valueOf((int)codePoint);
                }
                return UTF16.valueOf((int)codePoint);
            }
        };

        public Builder(UnicodeProperty.Factory factory) {
            this.propFactory = factory;
            this.symbolTable = new MyXSymbolTable();
            this.htmlRules.put(new Double(0.2), "sot \u00f7");
            this.htmlRules.put(new Double(0.3), "\u00f7 eot");
            this.htmlRules.put(new Double(999.0), "\u00f7 Any");
        }

        public String toString(String testName, String indent) {
            StringBuffer result = new StringBuffer();
            result.append(indent + "<segmentation type=\"" + testName + "\">").append("\r\n");
            result.append(indent + "\t<variables>").append("\r\n");
            for (int i = 0; i < this.rawVariables.size(); ++i) {
                result.append(indent + "\t\t").append(this.rawVariables.get(i)).append("\r\n");
            }
            result.append(indent + "\t</variables>").append("\r\n");
            result.append(indent + "\t<segmentRules>").append("\r\n");
            Iterator it = this.xmlRules.keySet().iterator();
            while (it.hasNext()) {
                Object key = it.next();
                result.append(indent + "\t\t").append(this.xmlRules.get(key)).append("\r\n");
            }
            result.append(indent + "\t</segmentRules>").append("\r\n");
            for (int i = 0; i < this.lastComments.size(); ++i) {
                result.append(indent + "\t").append(this.lastComments.get(i)).append("\r\n");
            }
            result.append(indent + "</segmentation>").append("\r\n");
            return result.toString();
        }

        public boolean addLine(String line) {
            Double order;
            if (line.startsWith("show")) {
                line = line.substring(4).trim();
                System.out.println("# " + line + ": ");
                System.out.println("\t" + this.replaceVariables(line));
                return false;
            }
            if (line.startsWith("#")) {
                this.lastComments.add("<!-- " + line.substring(1).trim() + " -->");
                return false;
            }
            int relationPosition = line.indexOf(61);
            if (relationPosition >= 0) {
                this.addVariable(line.substring(0, relationPosition).trim(), line.substring(relationPosition + 1).trim());
                return false;
            }
            relationPosition = line.indexOf(41);
            try {
                order = new Double(Double.parseDouble(line.substring(0, relationPosition).trim()));
            }
            catch (Exception e) {
                throw new IllegalArgumentException("Rule must be of form '1)...': " + line);
            }
            line = line.substring(relationPosition + 1).trim();
            relationPosition = line.indexOf(247);
            byte breaks = 1;
            if (relationPosition < 0) {
                relationPosition = line.indexOf(215);
                if (relationPosition < 0) {
                    throw new IllegalArgumentException("Couldn't find =, \u00f7, or \u00d7");
                }
                breaks = -1;
            }
            this.addRule(order, line.substring(0, relationPosition).trim(), breaks, line.substring(relationPosition + 1).trim(), line);
            return true;
        }

        Builder addVariable(String name, String value) {
            if (this.lastComments.size() != 0) {
                this.rawVariables.addAll(this.lastComments);
                this.lastComments.clear();
            }
            this.rawVariables.add("<variable id=\"" + name + "\">" + TransliteratorUtilities.toXML.transliterate(value) + "</variable>");
            if (!this.identifierMatcher.reset(name).matches()) {
                throw new IllegalArgumentException("Variable name must be $id: '" + name + "'");
            }
            value = this.replaceVariables(value);
            try {
                this.parsePosition.setIndex(0);
                UnicodeSet valueSet = new UnicodeSet(value, this.parsePosition, (SymbolTable)this.symbolTable);
                if (this.parsePosition.getIndex() != value.length()) {
                    if (SHOW_SAMPLES) {
                        System.out.println(this.parsePosition.getIndex() + ", " + value.length() + " -- No samples for: " + name + " = " + value);
                    }
                } else if (valueSet.size() == 0) {
                    if (SHOW_SAMPLES) {
                        System.out.println("Empty -- No samples for: " + name + " = " + value);
                    }
                } else {
                    String name2 = name;
                    if (name2.startsWith("$")) {
                        name2 = name2.substring(1);
                    }
                    Builder.composeWith(this.samples, valueSet, name2, myComposer);
                    if (SHOW_SAMPLES) {
                        System.out.println("Samples for: " + name + " = " + value);
                        System.out.println("\t" + valueSet);
                    }
                }
            }
            catch (Exception exception) {
                // empty catch block
            }
            Pattern.compile(value, 44).matcher("");
            this.variables.put(name, value);
            return this;
        }

        public static UnicodeMap composeWith(UnicodeMap target, UnicodeSet set, Object value, UnicodeMap.Composer composer) {
            UnicodeSetIterator it = new UnicodeSetIterator(set);
            while (it.next()) {
                Object v3;
                int i = it.codepoint;
                Object v1 = target.getValue(i);
                if (v1 == (v3 = composer.compose(i, v1, value)) || v1 != null && v1.equals(v3)) continue;
                target.put(i, v3);
            }
            return target;
        }

        private void findRegexProblem(String value) {
            UnicodeSet us = new UnicodeSet(value);
            String parsing = null;
            try {
                for (int i = 0; i < us.size(); ++i) {
                    UnicodeSet temp = new UnicodeSet(us).retain(0, us.charAt(i));
                    parsing = this.getInsertablePattern(temp);
                    Pattern.compile(parsing, 44).matcher("");
                }
            }
            catch (PatternSyntaxException e) {
                int index = e.getIndex();
                throw (RuntimeException)new IllegalArgumentException("Can't parse: " + parsing.substring(0, index) + "<<<>>>" + parsing.substring(index)).initCause(e);
            }
        }

        Builder addRule(Double order, String before, byte result, String after, String line) {
            if (this.brokenIdentifierMatcher.reset(line).find()) {
                int pos = this.brokenIdentifierMatcher.start();
                throw new IllegalArgumentException("Illegal identifier at:" + line.substring(0, pos) + "<<>>" + line.substring(pos));
            }
            line = this.whiteSpace.reset(line).replaceAll(" ");
            if (this.lastComments.size() != 0) {
                double increment = 1.0E-4;
                double temp = order - increment * (double)this.lastComments.size();
                for (int i = 0; i < this.lastComments.size(); ++i) {
                    Double position = new Double(temp);
                    if (this.xmlRules.containsKey(position)) {
                        System.out.println("WARNING: Overriding rule " + position);
                    }
                    this.xmlRules.put(position, this.lastComments.get(i));
                    temp += increment;
                }
                this.lastComments.clear();
            }
            this.htmlRules.put(order, TransliteratorUtilities.toHTML.transliterate(line));
            this.xmlRules.put(order, "<rule id=\"" + nf.format((Object)order) + "\"" + "> " + TransliteratorUtilities.toXML.transliterate(line) + " </rule>");
            this.rules.put(order, new Rule(this.replaceVariables(before), result, this.replaceVariables(after), line));
            return this;
        }

        public Segmenter make() {
            Segmenter result = new Segmenter();
            Iterator it = this.rules.keySet().iterator();
            while (it.hasNext()) {
                Double key = (Double)it.next();
                result.add(key, (Rule)this.rules.get(key));
            }
            result.samples = this.samples;
            return result;
        }

        private String replaceVariables(String input) {
            String result = input;
            int position = -1;
            block0: while ((position = result.indexOf(36, position)) >= 0) {
                Iterator it = this.variables.keySet().iterator();
                while (it.hasNext()) {
                    String name = (String)it.next();
                    if (!result.regionMatches(position, name, 0, name.length())) continue;
                    String value = (String)this.variables.get(name);
                    result = result.substring(0, position) + value + result.substring(position + name.length());
                    position += value.length();
                    continue block0;
                }
                if (!this.identifierMatcher.reset(result.substring(position)).lookingAt()) continue;
                throw new IllegalArgumentException("Illegal variable at: '" + result.substring(position) + "'");
            }
            for (int i = 0; i < result.length(); ++i) {
                if (!UnicodeSet.resemblesPattern((String)result, (int)i)) continue;
                this.parsePosition.setIndex(i);
                UnicodeSet temp = new UnicodeSet(result, this.parsePosition, (SymbolTable)this.symbolTable);
                String insert = this.getInsertablePattern(temp);
                result = result.substring(0, i) + insert + result.substring(this.parsePosition.getIndex());
                i += insert.length() - 1;
            }
            return result;
        }

        private String getInsertablePattern(UnicodeSet temp) {
            String result;
            UnicodeSet reversal;
            temp.complement().complement();
            temp.remove(65536, 0x10FFFF);
            if (DEBUG_RETAIN != null) {
                temp.retainAll(DEBUG_RETAIN);
                if (temp.size() == 0) {
                    temp.add(65535);
                }
            }
            if (!(reversal = new UnicodeSet(result = Builder.toPattern(temp, JavaRegexShower))).equals((Object)temp)) {
                throw new IllegalArgumentException("Failure on UnicodeSet print");
            }
            return result;
        }

        private static String toPattern(UnicodeSet temp, CodePointShower shower) {
            StringBuffer result = new StringBuffer();
            result.append('[');
            UnicodeSetIterator it = new UnicodeSetIterator(temp);
            while (it.nextRange()) {
                int first = it.codepoint;
                result.append(shower.show(first++));
                if (first > it.codepointEnd) continue;
                if (first != it.codepointEnd) {
                    result.append('-');
                }
                result.append(shower.show(it.codepointEnd));
            }
            result.append(']');
            return result.toString();
        }

        public Map getVariables() {
            return Collections.unmodifiableMap(this.variables);
        }

        public List getRules() {
            ArrayList<String> result = new ArrayList<String>();
            Iterator it = this.htmlRules.keySet().iterator();
            while (it.hasNext()) {
                Object key = it.next();
                result.add(key + ")\t" + this.htmlRules.get(key));
            }
            return result;
        }

        static class MyComposer
        implements UnicodeMap.Composer {
            MyComposer() {
            }

            public Object compose(int codePoint, Object a, Object b) {
                if (a == null) {
                    return b;
                }
                if (b == null) {
                    return a;
                }
                if (a.equals(b)) {
                    return a;
                }
                return a + "_" + b;
            }
        }

        private class MyXSymbolTable
        extends UnicodeSet.XSymbolTable {
            private MyXSymbolTable() {
            }

            public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) {
                UnicodeProperty prop = Builder.this.propFactory.getProperty(propertyName);
                if (prop == null) {
                    return false;
                }
                result.clear();
                UnicodeSet x = prop.getSet(propertyValue, result);
                return x.size() != 0;
            }
        }
    }

    public static class Rule {
        public static final byte NO_BREAK = -1;
        public static final byte UNKNOWN_BREAK = 0;
        public static final byte BREAK = 1;
        private Matcher matchPrevious;
        private Matcher matchSucceeding;
        private String name;
        private String resolved;
        private byte breaks;

        public Rule(String before, byte result, String after, String line) {
            this.breaks = result;
            before = ".*(" + before + ")";
            String parsing = null;
            try {
                parsing = before;
                this.matchPrevious = Pattern.compile(parsing, 44).matcher("");
                parsing = after;
                this.matchSucceeding = Pattern.compile(parsing, 44).matcher("");
            }
            catch (PatternSyntaxException e) {
                int index = e.getIndex();
                throw (RuntimeException)new IllegalArgumentException("On <" + line + ">, Can't parse: " + parsing.substring(0, index) + "<<<>>>" + parsing.substring(index)).initCause(e);
            }
            catch (RuntimeException e) {
                throw (RuntimeException)new IllegalArgumentException("On <" + line + ">, Can't parse: " + parsing).initCause(e);
            }
            this.name = line;
            this.resolved = Utility.escape((String)before) + (result == -1 ? " \u00d7 " : " \u00f7 ") + Utility.escape((String)after);
        }

        public byte matches(CharSequence text, int position) {
            if (!Segmenter.matchAfter(this.matchSucceeding, text, position)) {
                return 0;
            }
            if (!Segmenter.matchBefore(this.matchPrevious, text, position)) {
                return 0;
            }
            return this.breaks;
        }

        public String toString() {
            return this.toString(false);
        }

        public String toString(boolean showResolved) {
            String result = this.name;
            if (showResolved) {
                result = result + ": " + this.resolved;
            }
            return result;
        }
    }

    public static interface CodePointShower {
        public String show(int var1);
    }
}

