Categories:
Audio (13)
Biotech (29)
Bytecode (36)
Database (77)
Framework (7)
Game (7)
General (507)
Graphics (53)
I/O (35)
IDE (2)
JAR Tools (101)
JavaBeans (21)
JDBC (121)
JDK (426)
JSP (20)
Logging (108)
Mail (58)
Messaging (8)
Network (84)
PDF (97)
Report (7)
Scripting (84)
Security (32)
Server (121)
Servlet (26)
SOAP (24)
Testing (54)
Web (15)
XML (309)
Collections:
Other Resources:
JRE 8 rt.jar - com.* Package Source Code
JRE 8 rt.jar is the JAR file for JRE 8 RT (Runtime) libraries. JRE (Java Runtime) 8 is the runtime environment included in JDK 8. JRE 8 rt.jar libraries are divided into 6 packages:
com.* - Internal Oracle and Sun Microsystems libraries java.* - Standard Java API libraries. javax.* - Extended Java API libraries. jdk.* - JDK supporting libraries. org.* - Third party libraries. sun.* - Old libraries developed by Sun Microsystems.
JAR File Information:
Directory of C:\fyicenter\jdk-1.8.0_191\jre\lib 63,596,151 rt.jar
Here is the list of Java classes of the com.* package in JRE 1.8.0_191 rt.jar. Java source codes are also provided.
✍: FYIcenter
⏎ com/sun/org/apache/regexp/internal/RECompiler.java
/* * Copyright (c) 2007, 2018, Oracle and/or its affiliates. All rights reserved. * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. */ /* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.sun.org.apache.regexp.internal; import com.sun.org.apache.regexp.internal.RE; import java.util.Hashtable; /** * A regular expression compiler class. This class compiles a pattern string into a * regular expression program interpretable by the RE evaluator class. The 'recompile' * command line tool uses this compiler to pre-compile regular expressions for use * with RE. For a description of the syntax accepted by RECompiler and what you can * do with regular expressions, see the documentation for the RE matcher class. * * @see RE * @see recompile * * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a> * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a> */ public class RECompiler { // The compiled program char[] instruction; // The compiled RE 'program' instruction buffer int lenInstruction; // The amount of the program buffer currently in use // Input state for compiling regular expression String pattern; // Input string int len; // Length of the pattern string int idx; // Current input index into ac int parens; // Total number of paren pairs // Node flags static final int NODE_NORMAL = 0; // No flags (nothing special) static final int NODE_NULLABLE = 1; // True if node is potentially null static final int NODE_TOPLEVEL = 2; // True if top level expr // Special types of 'escapes' static final int ESC_MASK = 0xffff0; // Escape complexity mask static final int ESC_BACKREF = 0xfffff; // Escape is really a backreference static final int ESC_COMPLEX = 0xffffe; // Escape isn't really a true character static final int ESC_CLASS = 0xffffd; // Escape represents a whole class of characters // {m,n} stacks int maxBrackets = 10; // Maximum number of bracket pairs static final int bracketUnbounded = -1; // Unbounded value int brackets = 0; // Number of bracket sets int[] bracketStart = null; // Starting point int[] bracketEnd = null; // Ending point int[] bracketMin = null; // Minimum number of matches int[] bracketOpt = null; // Additional optional matches // Lookup table for POSIX character class names static Hashtable hashPOSIX = new Hashtable(); static { hashPOSIX.put("alnum", new Character(RE.POSIX_CLASS_ALNUM)); hashPOSIX.put("alpha", new Character(RE.POSIX_CLASS_ALPHA)); hashPOSIX.put("blank", new Character(RE.POSIX_CLASS_BLANK)); hashPOSIX.put("cntrl", new Character(RE.POSIX_CLASS_CNTRL)); hashPOSIX.put("digit", new Character(RE.POSIX_CLASS_DIGIT)); hashPOSIX.put("graph", new Character(RE.POSIX_CLASS_GRAPH)); hashPOSIX.put("lower", new Character(RE.POSIX_CLASS_LOWER)); hashPOSIX.put("print", new Character(RE.POSIX_CLASS_PRINT)); hashPOSIX.put("punct", new Character(RE.POSIX_CLASS_PUNCT)); hashPOSIX.put("space", new Character(RE.POSIX_CLASS_SPACE)); hashPOSIX.put("upper", new Character(RE.POSIX_CLASS_UPPER)); hashPOSIX.put("xdigit", new Character(RE.POSIX_CLASS_XDIGIT)); hashPOSIX.put("javastart", new Character(RE.POSIX_CLASS_JSTART)); hashPOSIX.put("javapart", new Character(RE.POSIX_CLASS_JPART)); } /** * Constructor. Creates (initially empty) storage for a regular expression program. */ public RECompiler() { // Start off with a generous, yet reasonable, initial size instruction = new char[128]; lenInstruction = 0; } /** * Ensures that n more characters can fit in the program buffer. * If n more can't fit, then the size is doubled until it can. * @param n Number of additional characters to ensure will fit. */ void ensure(int n) { // Get current program length int curlen = instruction.length; // If the current length + n more is too much if (lenInstruction + n >= curlen) { // Double the size of the program array until n more will fit while (lenInstruction + n >= curlen) { curlen *= 2; } // Allocate new program array and move data into it char[] newInstruction = new char[curlen]; System.arraycopy(instruction, 0, newInstruction, 0, lenInstruction); instruction = newInstruction; } } /** * Emit a single character into the program stream. * @param c Character to add */ void emit(char c) { // Make room for character ensure(1); // Add character instruction[lenInstruction++] = c; } /** * Inserts a node with a given opcode and opdata at insertAt. The node relative next * pointer is initialized to 0. * @param opcode Opcode for new node * @param opdata Opdata for new node (only the low 16 bits are currently used) * @param insertAt Index at which to insert the new node in the program */ void nodeInsert(char opcode, int opdata, int insertAt) { // Make room for a new node ensure(RE.nodeSize); // Move everything from insertAt to the end down nodeSize elements System.arraycopy(instruction, insertAt, instruction, insertAt + RE.nodeSize, lenInstruction - insertAt); instruction[insertAt + RE.offsetOpcode] = opcode; instruction[insertAt + RE.offsetOpdata] = (char)opdata; instruction[insertAt + RE.offsetNext] = 0; lenInstruction += RE.nodeSize; } /** * Appends a node to the end of a node chain * @param node Start of node chain to traverse * @param pointTo Node to have the tail of the chain point to */ void setNextOfEnd(int node, int pointTo) { // Traverse the chain until the next offset is 0 int next = instruction[node + RE.offsetNext]; // while the 'node' is not the last in the chain // and the 'node' is not the last in the program. while ( next != 0 && node < lenInstruction ) { // if the node we are supposed to point to is in the chain then // point to the end of the program instead. // Michael McCallum <gholam@xtra.co.nz> // FIXME: // This is a _hack_ to stop infinite programs. // I believe that the implementation of the reluctant matches is wrong but // have not worked out a better way yet. if ( node == pointTo ) { pointTo = lenInstruction; } node += next; next = instruction[node + RE.offsetNext]; } // if we have reached the end of the program then dont set the pointTo. // im not sure if this will break any thing but passes all the tests. if ( node < lenInstruction ) { // Point the last node in the chain to pointTo. instruction[node + RE.offsetNext] = (char)(short)(pointTo - node); } } /** * Adds a new node * @param opcode Opcode for node * @param opdata Opdata for node (only the low 16 bits are currently used) * @return Index of new node in program */ int node(char opcode, int opdata) { // Make room for a new node ensure(RE.nodeSize); // Add new node at end instruction[lenInstruction + RE.offsetOpcode] = opcode; instruction[lenInstruction + RE.offsetOpdata] = (char)opdata; instruction[lenInstruction + RE.offsetNext] = 0; lenInstruction += RE.nodeSize; // Return index of new node return lenInstruction - RE.nodeSize; } /** * Throws a new internal error exception * @exception Error Thrown in the event of an internal error. */ void internalError() throws Error { throw new Error("Internal error!"); } /** * Throws a new syntax error exception * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ void syntaxError(String s) throws RESyntaxException { throw new RESyntaxException(s); } /** * Allocate storage for brackets only as needed */ void allocBrackets() { // Allocate bracket stacks if not already done if (bracketStart == null) { // Allocate storage bracketStart = new int[maxBrackets]; bracketEnd = new int[maxBrackets]; bracketMin = new int[maxBrackets]; bracketOpt = new int[maxBrackets]; // Initialize to invalid values for (int i = 0; i < maxBrackets; i++) { bracketStart[i] = bracketEnd[i] = bracketMin[i] = bracketOpt[i] = -1; } } } /** Enlarge storage for brackets only as needed. */ synchronized void reallocBrackets() { // trick the tricky if (bracketStart == null) { allocBrackets(); } int new_size = maxBrackets * 2; int[] new_bS = new int[new_size]; int[] new_bE = new int[new_size]; int[] new_bM = new int[new_size]; int[] new_bO = new int[new_size]; // Initialize to invalid values for (int i=brackets; i<new_size; i++) { new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1; } System.arraycopy(bracketStart,0, new_bS,0, brackets); System.arraycopy(bracketEnd,0, new_bE,0, brackets); System.arraycopy(bracketMin,0, new_bM,0, brackets); System.arraycopy(bracketOpt,0, new_bO,0, brackets); bracketStart = new_bS; bracketEnd = new_bE; bracketMin = new_bM; bracketOpt = new_bO; maxBrackets = new_size; } /** * Match bracket {m,n} expression put results in bracket member variables * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ void bracket() throws RESyntaxException { // Current character must be a '{' if (idx >= len || pattern.charAt(idx++) != '{') { internalError(); } // Next char must be a digit if (idx >= len || !Character.isDigit(pattern.charAt(idx))) { syntaxError("Expected digit"); } // Get min ('m' of {m,n}) number StringBuffer number = new StringBuffer(); while (idx < len && Character.isDigit(pattern.charAt(idx))) { number.append(pattern.charAt(idx++)); } try { bracketMin[brackets] = Integer.parseInt(number.toString()); } catch (NumberFormatException e) { syntaxError("Expected valid number"); } // If out of input, fail if (idx >= len) { syntaxError("Expected comma or right bracket"); } // If end of expr, optional limit is 0 if (pattern.charAt(idx) == '}') { idx++; bracketOpt[brackets] = 0; return; } // Must have at least {m,} and maybe {m,n}. if (idx >= len || pattern.charAt(idx++) != ',') { syntaxError("Expected comma"); } // If out of input, fail if (idx >= len) { syntaxError("Expected comma or right bracket"); } // If {m,} max is unlimited if (pattern.charAt(idx) == '}') { idx++; bracketOpt[brackets] = bracketUnbounded; return; } // Next char must be a digit if (idx >= len || !Character.isDigit(pattern.charAt(idx))) { syntaxError("Expected digit"); } // Get max number number.setLength(0); while (idx < len && Character.isDigit(pattern.charAt(idx))) { number.append(pattern.charAt(idx++)); } try { bracketOpt[brackets] = Integer.parseInt(number.toString()) - bracketMin[brackets]; } catch (NumberFormatException e) { syntaxError("Expected valid number"); } // Optional repetitions must be >= 0 if (bracketOpt[brackets] < 0) { syntaxError("Bad range"); } // Must have close brace if (idx >= len || pattern.charAt(idx++) != '}') { syntaxError("Missing close brace"); } } /** * Match an escape sequence. Handles quoted chars and octal escapes as well * as normal escape characters. Always advances the input stream by the * right amount. This code "understands" the subtle difference between an * octal escape and a backref. You can access the type of ESC_CLASS or * ESC_COMPLEX or ESC_BACKREF by looking at pattern[idx - 1]. * @return ESC_* code or character if simple escape * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int escape() throws RESyntaxException { // "Shouldn't" happen if (pattern.charAt(idx) != '\\') { internalError(); } // Escape shouldn't occur as last character in string! if (idx + 1 == len) { syntaxError("Escape terminates string"); } // Switch on character after backslash idx += 2; char escapeChar = pattern.charAt(idx - 1); switch (escapeChar) { case RE.E_BOUND: case RE.E_NBOUND: return ESC_COMPLEX; case RE.E_ALNUM: case RE.E_NALNUM: case RE.E_SPACE: case RE.E_NSPACE: case RE.E_DIGIT: case RE.E_NDIGIT: return ESC_CLASS; case 'u': case 'x': { // Exact required hex digits for escape type int hexDigits = (escapeChar == 'u' ? 4 : 2); // Parse up to hexDigits characters from input int val = 0; for ( ; idx < len && hexDigits-- > 0; idx++) { // Get char char c = pattern.charAt(idx); // If it's a hexadecimal digit (0-9) if (c >= '0' && c <= '9') { // Compute new value val = (val << 4) + c - '0'; } else { // If it's a hexadecimal letter (a-f) c = Character.toLowerCase(c); if (c >= 'a' && c <= 'f') { // Compute new value val = (val << 4) + (c - 'a') + 10; } else { // If it's not a valid digit or hex letter, the escape must be invalid // because hexDigits of input have not been absorbed yet. syntaxError("Expected " + hexDigits + " hexadecimal digits after \\" + escapeChar); } } } return val; } case 't': return '\t'; case 'n': return '\n'; case 'r': return '\r'; case 'f': return '\f'; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // An octal escape starts with a 0 or has two digits in a row if ((idx < len && Character.isDigit(pattern.charAt(idx))) || escapeChar == '0') { // Handle \nnn octal escapes int val = escapeChar - '0'; if (idx < len && Character.isDigit(pattern.charAt(idx))) { val = ((val << 3) + (pattern.charAt(idx++) - '0')); if (idx < len && Character.isDigit(pattern.charAt(idx))) { val = ((val << 3) + (pattern.charAt(idx++) - '0')); } } return val; } // It's actually a backreference (\[1-9]), not an escape return ESC_BACKREF; default: // Simple quoting of a character return escapeChar; } } /** * Compile a character class * @return Index of class node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int characterClass() throws RESyntaxException { // Check for bad calling or empty class if (pattern.charAt(idx) != '[') { internalError(); } // Check for unterminated or empty class if ((idx + 1) >= len || pattern.charAt(++idx) == ']') { syntaxError("Empty or unterminated class"); } // Check for POSIX character class if (idx < len && pattern.charAt(idx) == ':') { // Skip colon idx++; // POSIX character classes are denoted with lowercase ASCII strings int idxStart = idx; while (idx < len && pattern.charAt(idx) >= 'a' && pattern.charAt(idx) <= 'z') { idx++; } // Should be a ":]" to terminate the POSIX character class if ((idx + 1) < len && pattern.charAt(idx) == ':' && pattern.charAt(idx + 1) == ']') { // Get character class String charClass = pattern.substring(idxStart, idx); // Select the POSIX class id Character i = (Character)hashPOSIX.get(charClass); if (i != null) { // Move past colon and right bracket idx += 2; // Return new POSIX character class node return node(RE.OP_POSIXCLASS, i.charValue()); } syntaxError("Invalid POSIX character class '" + charClass + "'"); } syntaxError("Invalid POSIX character class syntax"); } // Try to build a class. Create OP_ANYOF node int ret = node(RE.OP_ANYOF, 0); // Parse class declaration char CHAR_INVALID = Character.MAX_VALUE; char last = CHAR_INVALID; char simpleChar = 0; boolean include = true; boolean definingRange = false; int idxFirst = idx; char rangeStart = Character.MIN_VALUE; char rangeEnd; RERange range = new RERange(); while (idx < len && pattern.charAt(idx) != ']') { switchOnCharacter: // Switch on character switch (pattern.charAt(idx)) { case '^': include = !include; if (idx == idxFirst) { range.include(Character.MIN_VALUE, Character.MAX_VALUE, true); } idx++; continue; case '\\': { // Escape always advances the stream int c; switch (c = escape ()) { case ESC_COMPLEX: case ESC_BACKREF: // Word boundaries and backrefs not allowed in a character class! syntaxError("Bad character class"); case ESC_CLASS: // Classes can't be an endpoint of a range if (definingRange) { syntaxError("Bad character class"); } // Handle specific type of class (some are ok) switch (pattern.charAt(idx - 1)) { case RE.E_NSPACE: case RE.E_NDIGIT: case RE.E_NALNUM: syntaxError("Bad character class"); case RE.E_SPACE: range.include('\t', include); range.include('\r', include); range.include('\f', include); range.include('\n', include); range.include('\b', include); range.include(' ', include); break; case RE.E_ALNUM: range.include('a', 'z', include); range.include('A', 'Z', include); range.include('_', include); // Fall through! case RE.E_DIGIT: range.include('0', '9', include); break; } // Make last char invalid (can't be a range start) last = CHAR_INVALID; break; default: // Escape is simple so treat as a simple char simpleChar = (char) c; break switchOnCharacter; } } continue; case '-': // Start a range if one isn't already started if (definingRange) { syntaxError("Bad class range"); } definingRange = true; // If no last character, start of range is 0 rangeStart = (last == CHAR_INVALID ? 0 : last); // Premature end of range. define up to Character.MAX_VALUE if ((idx + 1) < len && pattern.charAt(++idx) == ']') { simpleChar = Character.MAX_VALUE; break; } continue; default: simpleChar = pattern.charAt(idx++); break; } // Handle simple character simpleChar if (definingRange) { // if we are defining a range make it now rangeEnd = simpleChar; // Actually create a range if the range is ok if (rangeStart >= rangeEnd) { syntaxError("Bad character class"); } range.include(rangeStart, rangeEnd, include); // We are done defining the range last = CHAR_INVALID; definingRange = false; } else { // If simple character and not start of range, include it if (idx >= len || pattern.charAt(idx) != '-') { range.include(simpleChar, include); } last = simpleChar; } } // Shouldn't be out of input if (idx == len) { syntaxError("Unterminated character class"); } // Absorb the ']' end of class marker idx++; // Emit character class definition instruction[ret + RE.offsetOpdata] = (char)range.num; for (int i = 0; i < range.num; i++) { emit((char)range.minRange[i]); emit((char)range.maxRange[i]); } return ret; } /** * Absorb an atomic character string. This method is a little tricky because * it can un-include the last character of string if a closure operator follows. * This is correct because *+? have higher precedence than concatentation (thus * ABC* means AB(C*) and NOT (ABC)*). * @return Index of new atom node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int atom() throws RESyntaxException { // Create a string node int ret = node(RE.OP_ATOM, 0); // Length of atom int lenAtom = 0; // Loop while we've got input atomLoop: while (idx < len) { // Is there a next char? if ((idx + 1) < len) { char c = pattern.charAt(idx + 1); // If the next 'char' is an escape, look past the whole escape if (pattern.charAt(idx) == '\\') { int idxEscape = idx; escape(); if (idx < len) { c = pattern.charAt(idx); } idx = idxEscape; } // Switch on next char switch (c) { case '{': case '?': case '*': case '+': // If the next character is a closure operator and our atom is non-empty, the // current character should bind to the closure operator rather than the atom if (lenAtom != 0) { break atomLoop; } } } // Switch on current char switch (pattern.charAt(idx)) { case ']': case '^': case '$': case '.': case '[': case '(': case ')': case '|': break atomLoop; case '{': case '?': case '*': case '+': // We should have an atom by now if (lenAtom == 0) { // No atom before closure syntaxError("Missing operand to closure"); } break atomLoop; case '\\': { // Get the escaped character (advances input automatically) int idxBeforeEscape = idx; int c = escape(); // Check if it's a simple escape (as opposed to, say, a backreference) if ((c & ESC_MASK) == ESC_MASK) { // Not a simple escape, so backup to where we were before the escape. idx = idxBeforeEscape; break atomLoop; } // Add escaped char to atom emit((char) c); lenAtom++; } break; default: // Add normal character to atom emit(pattern.charAt(idx++)); lenAtom++; break; } } // This "shouldn't" happen if (lenAtom == 0) { internalError(); } // Emit the atom length into the program instruction[ret + RE.offsetOpdata] = (char)lenAtom; return ret; } /** * Match a terminal node. * @param flags Flags * @return Index of terminal node (closeable) * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int terminal(int[] flags) throws RESyntaxException { switch (pattern.charAt(idx)) { case RE.OP_EOL: case RE.OP_BOL: case RE.OP_ANY: return node(pattern.charAt(idx++), 0); case '[': return characterClass(); case '(': return expr(flags); case ')': syntaxError("Unexpected close paren"); case '|': internalError(); case ']': syntaxError("Mismatched class"); case 0: syntaxError("Unexpected end of input"); case '?': case '+': case '{': case '*': syntaxError("Missing operand to closure"); case '\\': { // Don't forget, escape() advances the input stream! int idxBeforeEscape = idx; // Switch on escaped character switch (escape()) { case ESC_CLASS: case ESC_COMPLEX: flags[0] &= ~NODE_NULLABLE; return node(RE.OP_ESCAPE, pattern.charAt(idx - 1)); case ESC_BACKREF: { char backreference = (char)(pattern.charAt(idx - 1) - '0'); if (parens <= backreference) { syntaxError("Bad backreference"); } flags[0] |= NODE_NULLABLE; return node(RE.OP_BACKREF, backreference); } default: // We had a simple escape and we want to have it end up in // an atom, so we back up and fall though to the default handling idx = idxBeforeEscape; flags[0] &= ~NODE_NULLABLE; break; } } } // Everything above either fails or returns. // If it wasn't one of the above, it must be the start of an atom. flags[0] &= ~NODE_NULLABLE; return atom(); } /** * Compile a possibly closured terminal * @param flags Flags passed by reference * @return Index of closured node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int closure(int[] flags) throws RESyntaxException { // Before terminal int idxBeforeTerminal = idx; // Values to pass by reference to terminal() int[] terminalFlags = { NODE_NORMAL }; // Get terminal symbol int ret = terminal(terminalFlags); // Or in flags from terminal symbol flags[0] |= terminalFlags[0]; // Advance input, set NODE_NULLABLE flag and do sanity checks if (idx >= len) { return ret; } boolean greedy = true; char closureType = pattern.charAt(idx); switch (closureType) { case '?': case '*': // The current node can be null flags[0] |= NODE_NULLABLE; case '+': // Eat closure character idx++; case '{': // Don't allow blantant stupidity int opcode = instruction[ret + RE.offsetOpcode]; if (opcode == RE.OP_BOL || opcode == RE.OP_EOL) { syntaxError("Bad closure operand"); } if ((terminalFlags[0] & NODE_NULLABLE) != 0) { syntaxError("Closure operand can't be nullable"); } break; } // If the next character is a '?', make the closure non-greedy (reluctant) if (idx < len && pattern.charAt(idx) == '?') { idx++; greedy = false; } if (greedy) { // Actually do the closure now switch (closureType) { case '{': { // We look for our bracket in the list boolean found = false; int i; allocBrackets(); for (i = 0; i < brackets; i++) { if (bracketStart[i] == idx) { found = true; break; } } // If its not in the list we parse the {m,n} if (!found) { if (brackets >= maxBrackets) { reallocBrackets(); } bracketStart[brackets] = idx; bracket(); bracketEnd[brackets] = idx; i = brackets++; } // Process min first if (bracketMin[i]-- > 0) { if (bracketMin[i] > 0 || bracketOpt[i] != 0) { // Rewind stream and run it through again - more matchers coming for (int j = 0; j < brackets; j++) { if (j != i && bracketStart[j] < idx && bracketStart[j] >= idxBeforeTerminal) { brackets--; bracketStart[j] = bracketStart[brackets]; bracketEnd[j] = bracketEnd[brackets]; bracketMin[j] = bracketMin[brackets]; bracketOpt[j] = bracketOpt[brackets]; } } idx = idxBeforeTerminal; } else { // Bug #1030: No optinal matches - no need to rewind idx = bracketEnd[i]; } break; } // Do the right thing for maximum ({m,}) if (bracketOpt[i] == bracketUnbounded) { // Drop through now and closure expression. // We are done with the {m,} expr, so skip rest closureType = '*'; bracketOpt[i] = 0; idx = bracketEnd[i]; } else if (bracketOpt[i]-- > 0) { if (bracketOpt[i] > 0) { // More optional matchers - 'play it again sam!' idx = idxBeforeTerminal; } else { // Bug #1030: We are done - this one is last and optional idx = bracketEnd[i]; } // Drop through to optionally close closureType = '?'; } else { // Rollback terminal - neither min nor opt matchers present lenInstruction = ret; node(RE.OP_NOTHING, 0); // We are done. skip the rest of {m,n} expr idx = bracketEnd[i]; break; } } // Fall through! case '?': case '*': if (!greedy) { break; } if (closureType == '?') { // X? is compiled as (X|) nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X setNextOfEnd(ret, node (RE.OP_BRANCH, 0)); // inserted branch to option int nothing = node (RE.OP_NOTHING, 0); // which is OP_NOTHING setNextOfEnd(ret, nothing); // point (second) branch to OP_NOTHING setNextOfEnd(ret + RE.nodeSize, nothing); // point the end of X to OP_NOTHING node } if (closureType == '*') { // X* is compiled as (X{gotoX}|) nodeInsert(RE.OP_BRANCH, 0, ret); // branch before X setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 0)); // end of X points to an option setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0)); // to goto setNextOfEnd(ret + RE.nodeSize, ret); // the start again setNextOfEnd(ret, node(RE.OP_BRANCH, 0)); // the other option is setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // OP_NOTHING } break; case '+': { // X+ is compiled as X({gotoX}|) int branch; branch = node(RE.OP_BRANCH, 0); // a new branch setNextOfEnd(ret, branch); // is added to the end of X setNextOfEnd(node(RE.OP_GOTO, 0), ret); // one option is to go back to the start setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); // the other option setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // is OP_NOTHING } break; } } else { // Add end after closured subexpr setNextOfEnd(ret, node(RE.OP_END, 0)); // Actually do the closure now switch (closureType) { case '?': nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); break; case '*': nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret); break; case '+': nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret); break; } // Point to the expr after the closure setNextOfEnd(ret, lenInstruction); } return ret; } /** * Compile one branch of an or operator (implements concatenation) * @param flags Flags passed by reference * @return Pointer to branch node * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int branch(int[] flags) throws RESyntaxException { // Get each possibly closured piece and concat int node; int ret = node(RE.OP_BRANCH, 0); int chain = -1; int[] closureFlags = new int[1]; boolean nullable = true; while (idx < len && pattern.charAt(idx) != '|' && pattern.charAt(idx) != ')') { // Get new node closureFlags[0] = NODE_NORMAL; node = closure(closureFlags); if (closureFlags[0] == NODE_NORMAL) { nullable = false; } // If there's a chain, append to the end if (chain != -1) { setNextOfEnd(chain, node); } // Chain starts at current chain = node; } // If we don't run loop, make a nothing node if (chain == -1) { node(RE.OP_NOTHING, 0); } // Set nullable flag for this branch if (nullable) { flags[0] |= NODE_NULLABLE; } return ret; } /** * Compile an expression with possible parens around it. Paren matching * is done at this level so we can tie the branch tails together. * @param flags Flag value passed by reference * @return Node index of expression in instruction array * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int expr(int[] flags) throws RESyntaxException { // Create open paren node unless we were called from the top level (which has no parens) int paren = -1; int ret = -1; int closeParens = parens; if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(') { // if its a cluster ( rather than a proper subexpression ie with backrefs ) if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' && pattern.charAt( idx + 2 ) == ':' ) { paren = 2; idx += 3; ret = node( RE.OP_OPEN_CLUSTER, 0 ); } else { paren = 1; idx++; ret = node(RE.OP_OPEN, parens++); } } flags[0] &= ~NODE_TOPLEVEL; // Create a branch node int branch = branch(flags); if (ret == -1) { ret = branch; } else { setNextOfEnd(ret, branch); } // Loop through branches while (idx < len && pattern.charAt(idx) == '|') { idx++; branch = branch(flags); setNextOfEnd(ret, branch); } // Create an ending node (either a close paren or an OP_END) int end; if ( paren > 0 ) { if (idx < len && pattern.charAt(idx) == ')') { idx++; } else { syntaxError("Missing close paren"); } if ( paren == 1 ) { end = node(RE.OP_CLOSE, closeParens); } else { end = node( RE.OP_CLOSE_CLUSTER, 0 ); } } else { end = node(RE.OP_END, 0); } // Append the ending node to the ret nodelist setNextOfEnd(ret, end); // Hook the ends of each branch to the end node int currentNode = ret; int nextNodeOffset = instruction[ currentNode + RE.offsetNext ]; // while the next node o while ( nextNodeOffset != 0 && currentNode < lenInstruction ) { // If branch, make the end of the branch's operand chain point to the end node. if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH ) { setNextOfEnd( currentNode + RE.nodeSize, end ); } nextNodeOffset = instruction[ currentNode + RE.offsetNext ]; currentNode += nextNodeOffset; } // Return the node list return ret; } /** * Compiles a regular expression pattern into a program runnable by the pattern * matcher class 'RE'. * @param pattern Regular expression pattern to compile (see RECompiler class * for details). * @return A compiled regular expression program. * @exception RESyntaxException Thrown if the regular expression has invalid syntax. * @see RECompiler * @see RE */ public REProgram compile(String pattern) throws RESyntaxException { // Initialize variables for compilation this.pattern = pattern; // Save pattern in instance variable len = pattern.length(); // Precompute pattern length for speed idx = 0; // Set parsing index to the first character lenInstruction = 0; // Set emitted instruction count to zero parens = 1; // Set paren level to 1 (the implicit outer parens) brackets = 0; // No bracketed closures yet // Initialize pass by reference flags value int[] flags = { NODE_TOPLEVEL }; // Parse expression expr(flags); // Should be at end of input if (idx != len) { if (pattern.charAt(idx) == ')') { syntaxError("Unmatched close paren"); } syntaxError("Unexpected input remains"); } // Return the result char[] ins = new char[lenInstruction]; System.arraycopy(instruction, 0, ins, 0, lenInstruction); return new REProgram(parens, ins); } /** * Local, nested class for maintaining character ranges for character classes. */ class RERange { int size = 16; // Capacity of current range arrays int[] minRange = new int[size]; // Range minima int[] maxRange = new int[size]; // Range maxima int num = 0; // Number of range array elements in use /** * Deletes the range at a given index from the range lists * @param index Index of range to delete from minRange and maxRange arrays. */ void delete(int index) { // Return if no elements left or index is out of range if (num == 0 || index >= num) { return; } // Move elements down while (++index < num) { if (index - 1 >= 0) { minRange[index-1] = minRange[index]; maxRange[index-1] = maxRange[index]; } } // One less element now num--; } /** * Merges a range into the range list, coalescing ranges if possible. * @param min Minimum end of range * @param max Maximum end of range */ void merge(int min, int max) { // Loop through ranges for (int i = 0; i < num; i++) { // Min-max is subsumed by minRange[i]-maxRange[i] if (min >= minRange[i] && max <= maxRange[i]) { return; } // Min-max subsumes minRange[i]-maxRange[i] else if (min <= minRange[i] && max >= maxRange[i]) { delete(i); merge(min, max); return; } // Min is in the range, but max is outside else if (min >= minRange[i] && min <= maxRange[i]) { delete(i); min = minRange[i]; merge(min, max); return; } // Max is in the range, but min is outside else if (max >= minRange[i] && max <= maxRange[i]) { delete(i); max = maxRange[i]; merge(min, max); return; } } // Must not overlap any other ranges if (num >= size) { size *= 2; int[] newMin = new int[size]; int[] newMax = new int[size]; System.arraycopy(minRange, 0, newMin, 0, num); System.arraycopy(maxRange, 0, newMax, 0, num); minRange = newMin; maxRange = newMax; } minRange[num] = min; maxRange[num] = max; num++; } /** * Removes a range by deleting or shrinking all other ranges * @param min Minimum end of range * @param max Maximum end of range */ void remove(int min, int max) { // Loop through ranges for (int i = 0; i < num; i++) { // minRange[i]-maxRange[i] is subsumed by min-max if (minRange[i] >= min && maxRange[i] <= max) { delete(i); i--; return; } // min-max is subsumed by minRange[i]-maxRange[i] else if (min >= minRange[i] && max <= maxRange[i]) { int minr = minRange[i]; int maxr = maxRange[i]; delete(i); if (minr < min) { merge(minr, min - 1); } if (max < maxr) { merge(max + 1, maxr); } return; } // minRange is in the range, but maxRange is outside else if (minRange[i] >= min && minRange[i] <= max) { minRange[i] = max + 1; return; } // maxRange is in the range, but minRange is outside else if (maxRange[i] >= min && maxRange[i] <= max) { maxRange[i] = min - 1; return; } } } /** * Includes (or excludes) the range from min to max, inclusive. * @param min Minimum end of range * @param max Maximum end of range * @param include True if range should be included. False otherwise. */ void include(int min, int max, boolean include) { if (include) { merge(min, max); } else { remove(min, max); } } /** * Includes a range with the same min and max * @param minmax Minimum and maximum end of range (inclusive) * @param include True if range should be included. False otherwise. */ void include(char minmax, boolean include) { include(minmax, minmax, include); } } }
⏎ com/sun/org/apache/regexp/internal/RECompiler.java
Or download all of them as a single archive file:
File name: jre-rt-com-1.8.0_191-src.zip File size: 8099783 bytes Release date: 2018-10-28 Download
⇒ Backup JDK 8 Installation Directory
2023-02-07, 254034👍, 3💬
Popular Posts:
The Bouncy Castle Crypto package is a Java implementation of cryptographic algorithms, it was develo...
JDK 11 jdk.rmic.jmod is the JMOD file for JDK 11 RMI (Remote Method Invocation) Compiler Tool tool, ...
commons-lang-1.0.1.jar is the JAR file for Apache Commons Lang 1.0.1, which provides a host of helpe...
Where to find answers to frequently asked questions on Downloading and Using JDK (Java Development K...
What Is commons-fileupload-1.3.3 .jar?commons-fileupload-1.3.3 .jaris the JAR file for Apache Common...