//============================================================================== // ExprLexer.java //============================================================================== package tribble.parse.sql; // System imports import java.lang.Character; import java.lang.Exception; import java.lang.String; import java.lang.StringBuffer; import java.lang.System; import java.text.ParseException; import java.util.Vector; // Local imports // (None) /******************************************************************************* * Utility methods for SQL-like query expression parsing. * * * @version $Revision: 1.5 $ $Date: 2007/08/01 03:08:20 $ * @since 2001-03-12 * @author David R. Tribble (david@tribble.com). *
* Copyright ©2001 by David R. Tribble, all rights reserved.
* Permission is granted to any person or entity except those designated by
* by the United States Department of State as a terrorist, or terrorist
* government or agency, to use and distribute this source code provided
* that the original copyright notice remains present and unaltered.
*/
public abstract class ExprLexer
{
// Identification
/** Revision information. */
static final String REV =
"@(#)tribble/parse/sql/ExprLexer.java $Revision: 1.5 $ $Date: 2007/08/01 03:08:20 $\n";
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Constants
// (None)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Public static methods
/***************************************************************************
* Test driver for this class.
*
*
* Usage *
* * java tribble.parse.sql.ExprLexer textline... * * *
* This splits the textline into separate tokens. * * * @param args * Command line arguments. * * @throws Exception * Thrown if an error occurs. * * @since 1.3, 2001-04-16 */ public static void main(String[] args) throws Exception { // Parse each textline arg for (int i = 0; i < args.length; i++) { try { String textline; String[] toks; // Parse a text line into tokens textline = args[i]; System.out.println("\"" + textline + "\""); toks = getTokens(textline); for (int j = 0; j < toks.length; j++) System.out.println(" " + j + " \"" + toks[j] + "\""); } catch (ParseException ex) { // Malformed token encountered System.out.println("*** Error at offset " + ex.getErrorOffset()); ex.printStackTrace(System.out); } System.out.println(); } } /*************************************************************************** * Extract the next word token from an SQL-like query expression. * *
* Words may be quoted by surrounding them with single quote (') or * double quote (") characters. If a word contains either of these * characters, it must be quoted, and the embedded quote characters must be * preceded by a backslash character (\). (Embedded backslash * characters should not be preceded by another backslash, however.) * * *
* Example * *
* Consider the following input text line: *
* Date\ 2 = "2001-01-01" & `I.D.` like '%\'s' & Rec.Sz >= 80
* 0123456789_123456789_123456789_123456789_123456789_12345
*
*
* The line above contains the following word tokens at the string offsets
* shown:
*
* 0: "Date 2"
* 5: "="
* 7: "\"2001-01-01\""
* 20: "&"
* 22: "I.D."
* 29: "like"
* 34: "'%'s'"
* 41: "&"
* 43: "Rec"
* 46: "."
* 47: "Sz"
* 50: ">="
* 53: "80"
*
*
* @param line
* A string containing one or more SQL-like query expression word tokens
* separated by whitespace.
*
* @param pos
* Position within line where token parsing is to begin.
*
* @param tok
* The text of the extracted word token is returned in this object.
*
* @return
* The position of the character following the last one parsed from
* line, or zero if there are no more word tokens to extract from
* the line. The returned value can serve as the starting parse position in
* a subsequent call to this method.
*
* @throws ParseException
* Thrown if a token is malformed, such as missing a closing quote.
*
* @see #getTokens
*
* @since 1.1, 2001-03-12
*/
public static int getToken(String line, int pos, StringBuffer tok)
throws ParseException
{
int len;
int i;
char quote;
char ch;
// Extract the next word token from the text line
tok.setLength(0);
len = line.length();
i = pos;
// Skip leading spaces
ch = ' ';
while (i < len)
{
ch = line.charAt(i);
if (ch != ' ' && ch != '\t' &&
ch != '\n' && ch != '\r' && ch != '\f')
break;
i++;
}
// Check for end of line
if (i >= len)
return (0);
// Check for a leading quote
quote = ch;
if (quote == '"' || quote == '\'')
{
// Quoted string
tok.append(ch);
i++;
}
else if (quote == '`')
{
// Quoted name
i++;
}
else
quote = ' ';
// Extract the rest of the word token from the input line
ch = ' ';
loop:
while (i < len)
{
int j, k;
// Examine the next character in the line
ch = line.charAt(i++);
if (quote == ' ')
{
// Name or numeric token
switch (ch)
{
case ' ':
case '\t':
case '\n':
case '\r':
case '\f':
// Delimiter space, end of token
i--;
break loop;
case '"':
case '\'':
case ',':
case '(':
case ')':
case '[':
case ']':
case '&':
case '/':
case '+':
case '-':
// Delimiter punctuation, end of token
if (tok.length() == 0)
tok.append(ch);
else
i--;
break loop;
case '*':
case '|':
// Delimiter punctuation, end of token
if (tok.length() == 0)
{
tok.append(ch);
// Handle '**' and '||' operators
if (i < len)
{
char ch2;
ch2 = line.charAt(i++);
if (ch2 == ch)
tok.append(ch2);
else
i--;
}
}
else
i--;
break loop;
case '.':
// Delimiter punctuation or numeric decimal pt
k = tok.length();
if (k == 0)
{
tok.append(ch);
if (i < len && Character.isDigit(line.charAt(i)))
break; // Numeric decimal pt
else
break loop; // Delimiter punctuation
}
j = 0;
ch = tok.charAt(0);
if (ch == '+' || ch == '-')
j++;
while (j < k)
{
ch = tok.charAt(j++);
if (!Character.isDigit(ch))
{
// Delimiter punctuation
i--;
break loop;
}
}
// Numeric decimal pt
tok.append('.');
break;
case '\\':
// Escaped token char
if (i < len)
{
ch = line.charAt(i++);
tok.append(ch);
}
else
throw new ParseException("Missing escaped character",
i);
break;
default:
// Token char
tok.append(ch);
break;
}
}
else
{
// Within a quoted string literal token
if (ch == quote)
{
// Closing quote
if (quote != '`')
tok.append(ch);
break loop;
}
else if (ch == '\\')
{
if (i < len)
{
ch = line.charAt(i++);
if (ch == quote)
{
// Escaped quote char
tok.append(ch);
}
else
{
// Backslash ('\') char
tok.append('\\');
i--;
}
}
else
throw new ParseException("Missing closing quote ("
+ quote + ")", i);
}
else
{
// Token char
tok.append(ch);
}
}
}
// Check for missing closing quote
if (quote != ' ' && ch != quote)
throw new ParseException("Missing closing quote (" + quote + ")",
i);
// Done
return (i);
}
/***************************************************************************
* Split an SQL-like query expression into separate word tokens.
*
* * Words may be quoted by surrounding them with single quote (') or * double quote (") characters. If a word contains either of these * characters, it must be quoted, and the embedded quote characters must be * preceded by a backslash character (\). (Embedded backslash * characters should not be preceded by another backslash, however.) * * *
* Example * *
* Consider the following input text line: *
* Date\ 2 = "2001-01-01" & `I.D.` like '%\'s' & Rec.Sz >= 80
*
*
* The line above is split into the following tokens:
*
* [0]: "Date 2"
* [1]: "="
* [2]: "\"2001-01-01\""
* [3]: "&"
* [4]: "I.D."
* [5]: "like"
* [6]: "'%'s'"
* [7]: "&"
* [8]: "Rec"
* [9]: "."
* [10]: "Sz"
* [11]: ">="
* [10]: "80"
*
*
* @param line
* A string containing one or more text tokens separated by whitespace.
*
* @return
* An array of strings, representing the tokens extracted from line,
* or null if the input line is empty.
*
* @throws ParseException
* Thrown if a token is malformed, such as missing a closing quote.
*
* @see #getToken
*
* @since 1.1, 2001-03-12
*/
public static String[] getTokens(String line)
throws ParseException
{
StringBuffer tok;
Vector vec;
String[] arr;
int n;
// Extract word tokens from the text line
vec = new Vector(10);
tok = new StringBuffer(20);
n = 0;
do
{
// Extract the next word token from the line
n = getToken(line, n, tok);
// Add the word token to the list
if (n > 0)
vec.add(tok.toString());
} while (n > 0);
// Convert the vector of word tokens into an array
n = vec.size();
if (n == 0)
return (null);
arr = new String[n];
if (n > 0)
vec.toArray(arr);
return (arr);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Members
// (None)
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Constructors
/***************************************************************************
* Default constructor.
*
* @since 1.1, 2002-11-05
*/
private ExprLexer()
{
// Do nothing
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Methods
// (None)
}
// End ExprLexer.java