//============================================================================== // ExprLexer.java //============================================================================== package tribble.parse.sql; // System imports import java.lang.Character; import java.lang.Exception; import java.lang.String; import java.lang.StringBuffer; import java.lang.System; import java.text.ParseException; import java.util.Vector; // Local imports // (None) /******************************************************************************* * Utility methods for SQL-like query expression parsing. * * * @version $Revision: 1.5 $ $Date: 2007/08/01 03:08:20 $ * @since 2001-03-12 * @author David R. Tribble (david@tribble.com). *

* Copyright ©2001 by David R. Tribble, all rights reserved.
* Permission is granted to any person or entity except those designated by * by the United States Department of State as a terrorist, or terrorist * government or agency, to use and distribute this source code provided * that the original copyright notice remains present and unaltered. */ public abstract class ExprLexer { // Identification /** Revision information. */ static final String REV = "@(#)tribble/parse/sql/ExprLexer.java $Revision: 1.5 $ $Date: 2007/08/01 03:08:20 $\n"; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Constants // (None) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Public static methods /*************************************************************************** * Test driver for this class. * *

* Usage *

* * java tribble.parse.sql.ExprLexer textline... * * *

* This splits the textline into separate tokens. * * * @param args * Command line arguments. * * @throws Exception * Thrown if an error occurs. * * @since 1.3, 2001-04-16 */ public static void main(String[] args) throws Exception { // Parse each textline arg for (int i = 0; i < args.length; i++) { try { String textline; String[] toks; // Parse a text line into tokens textline = args[i]; System.out.println("\"" + textline + "\""); toks = getTokens(textline); for (int j = 0; j < toks.length; j++) System.out.println(" " + j + " \"" + toks[j] + "\""); } catch (ParseException ex) { // Malformed token encountered System.out.println("*** Error at offset " + ex.getErrorOffset()); ex.printStackTrace(System.out); } System.out.println(); } } /*************************************************************************** * Extract the next word token from an SQL-like query expression. * *

* Words may be quoted by surrounding them with single quote (') or * double quote (") characters. If a word contains either of these * characters, it must be quoted, and the embedded quote characters must be * preceded by a backslash character (\). (Embedded backslash * characters should not be preceded by another backslash, however.) * * *

* Example * *

* Consider the following input text line: *

    *    Date\ 2 = "2001-01-01" & `I.D.` like '%\'s' & Rec.Sz >= 80
    *    0123456789_123456789_123456789_123456789_123456789_12345
    * 
* * The line above contains the following word tokens at the string offsets * shown: *
    *     0: "Date 2"
    *     5: "="
    *     7: "\"2001-01-01\""
    *    20: "&"
    *    22: "I.D."
    *    29: "like"
    *    34: "'%'s'"
    *    41: "&"
    *    43: "Rec"
    *    46: "."
    *    47: "Sz"
    *    50: ">="
    *    53: "80" 
* * * @param line * A string containing one or more SQL-like query expression word tokens * separated by whitespace. * * @param pos * Position within line where token parsing is to begin. * * @param tok * The text of the extracted word token is returned in this object. * * @return * The position of the character following the last one parsed from * line, or zero if there are no more word tokens to extract from * the line. The returned value can serve as the starting parse position in * a subsequent call to this method. * * @throws ParseException * Thrown if a token is malformed, such as missing a closing quote. * * @see #getTokens * * @since 1.1, 2001-03-12 */ public static int getToken(String line, int pos, StringBuffer tok) throws ParseException { int len; int i; char quote; char ch; // Extract the next word token from the text line tok.setLength(0); len = line.length(); i = pos; // Skip leading spaces ch = ' '; while (i < len) { ch = line.charAt(i); if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' && ch != '\f') break; i++; } // Check for end of line if (i >= len) return (0); // Check for a leading quote quote = ch; if (quote == '"' || quote == '\'') { // Quoted string tok.append(ch); i++; } else if (quote == '`') { // Quoted name i++; } else quote = ' '; // Extract the rest of the word token from the input line ch = ' '; loop: while (i < len) { int j, k; // Examine the next character in the line ch = line.charAt(i++); if (quote == ' ') { // Name or numeric token switch (ch) { case ' ': case '\t': case '\n': case '\r': case '\f': // Delimiter space, end of token i--; break loop; case '"': case '\'': case ',': case '(': case ')': case '[': case ']': case '&': case '/': case '+': case '-': // Delimiter punctuation, end of token if (tok.length() == 0) tok.append(ch); else i--; break loop; case '*': case '|': // Delimiter punctuation, end of token if (tok.length() == 0) { tok.append(ch); // Handle '**' and '||' operators if (i < len) { char ch2; ch2 = line.charAt(i++); if (ch2 == ch) tok.append(ch2); else i--; } } else i--; break loop; case '.': // Delimiter punctuation or numeric decimal pt k = tok.length(); if (k == 0) { tok.append(ch); if (i < len && Character.isDigit(line.charAt(i))) break; // Numeric decimal pt else break loop; // Delimiter punctuation } j = 0; ch = tok.charAt(0); if (ch == '+' || ch == '-') j++; while (j < k) { ch = tok.charAt(j++); if (!Character.isDigit(ch)) { // Delimiter punctuation i--; break loop; } } // Numeric decimal pt tok.append('.'); break; case '\\': // Escaped token char if (i < len) { ch = line.charAt(i++); tok.append(ch); } else throw new ParseException("Missing escaped character", i); break; default: // Token char tok.append(ch); break; } } else { // Within a quoted string literal token if (ch == quote) { // Closing quote if (quote != '`') tok.append(ch); break loop; } else if (ch == '\\') { if (i < len) { ch = line.charAt(i++); if (ch == quote) { // Escaped quote char tok.append(ch); } else { // Backslash ('\') char tok.append('\\'); i--; } } else throw new ParseException("Missing closing quote (" + quote + ")", i); } else { // Token char tok.append(ch); } } } // Check for missing closing quote if (quote != ' ' && ch != quote) throw new ParseException("Missing closing quote (" + quote + ")", i); // Done return (i); } /*************************************************************************** * Split an SQL-like query expression into separate word tokens. * *

* Words may be quoted by surrounding them with single quote (') or * double quote (") characters. If a word contains either of these * characters, it must be quoted, and the embedded quote characters must be * preceded by a backslash character (\). (Embedded backslash * characters should not be preceded by another backslash, however.) * * *

* Example * *

* Consider the following input text line: *

    *    Date\ 2 = "2001-01-01" & `I.D.` like '%\'s' & Rec.Sz >= 80
    * 
* * The line above is split into the following tokens: *
    *    [0]:  "Date 2"
    *    [1]:  "="
    *    [2]:  "\"2001-01-01\""
    *    [3]:  "&"
    *    [4]:  "I.D."
    *    [5]:  "like"
    *    [6]:  "'%'s'"
    *    [7]:  "&"
    *    [8]:  "Rec"
    *    [9]:  "."
    *    [10]: "Sz"
    *    [11]: ">="
    *    [10]: "80" 
* * * @param line * A string containing one or more text tokens separated by whitespace. * * @return * An array of strings, representing the tokens extracted from line, * or null if the input line is empty. * * @throws ParseException * Thrown if a token is malformed, such as missing a closing quote. * * @see #getToken * * @since 1.1, 2001-03-12 */ public static String[] getTokens(String line) throws ParseException { StringBuffer tok; Vector vec; String[] arr; int n; // Extract word tokens from the text line vec = new Vector(10); tok = new StringBuffer(20); n = 0; do { // Extract the next word token from the line n = getToken(line, n, tok); // Add the word token to the list if (n > 0) vec.add(tok.toString()); } while (n > 0); // Convert the vector of word tokens into an array n = vec.size(); if (n == 0) return (null); arr = new String[n]; if (n > 0) vec.toArray(arr); return (arr); } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Members // (None) // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Constructors /*************************************************************************** * Default constructor. * * @since 1.1, 2002-11-05 */ private ExprLexer() { // Do nothing } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Methods // (None) } // End ExprLexer.java