//============================================================================== // CommandLexer.java //============================================================================== package tribble.net.ftp.shell; // System imports import java.io.IOException; import java.io.PrintWriter; import java.io.Reader; import java.io.Writer; import java.lang.Character; import java.lang.Integer; import java.lang.Exception; import java.lang.String; import java.lang.System; /******************************************************************************* * FTP command script lexical analyzer. * *
* See the package summary for details about * syntax and lexicon. * * * @version $Revision: 1.20 $ $Date: 2007/08/12 19:48:51 $ * @since API 1.0, 2007-03-14 * @author David R. Tribble (david@tribble.com). *
* Copyright ©2007 by David R. Tribble, all rights reserved.
* Permission is granted to any person or entity except those designated by
* by the United States Department of State as a terrorist, or terrorist
* government or agency, to use and distribute this source code provided
* that the original copyright notice remains present and unaltered.
*
* @see CommandParser
*/
class CommandLexer
implements CommandTokens
{
// Identification
/** Revision information. */
static final String REV =
"@(#)tribble/net/ftp/shell/CommandLexer.java $Revision: 1.20 $ $Date: 2007/08/12 19:48:51 $\n";
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Variables
/** Source name. */
private String m_srcName = "-";
/** Command script input source stream. */
private CommandFile m_in;
/** Error/warning message output stream. */
private PrintWriter m_out;
/** Input token buffer. */
private char[] m_buf = new char[2000+1];
/** Source line number of the last token read. */
int m_lineNo = 1;
/** Pushed-back token. */
private String m_nextTok;
/** Nesting depth of parethesized tokens. */
private int m_parenNest = 0;
/** Previous input character was whitespace. */
private boolean m_prevSP = true;
/** Previous token was a newline ({@link #TOK__NL}). */
private boolean m_prevNL = true;
/** Split the current word into multiple tokens. */
private boolean m_splitToken = false;
/** Current word is the left-most (first) token in the source line. */
private boolean m_leftmost = true;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Constructors
/***************************************************************************
* Constructor.
*
* @param in
* Command script input stream.
*
* @param out
* Error/warning message output stream.
*
* @since 1.1, 2007-03-14
*/
CommandLexer(Reader in, PrintWriter out)
{
// Initialize
m_in = new CommandFile(in, out, null);
m_out = out;
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Methods
/***************************************************************************
* Establish the name of this source stream (the command script).
*
* @param name
* Name of the input stream. This is usually a filename, but it can be any
* kind of identification for the source stream.
*
* @since 1.6, 2007-03-27
*/
void setSourceName(String name)
{
m_srcName = name;
m_in.setSourceName(name);
}
/***************************************************************************
* Establish whether or not token words in the current source input line are
* to be split or not.
*
*
* Note: This is something of a kludge, adding dirt to an otherwise fairly * clean lexer implementation. * * @param flag * True if subsequent token words are to be split into individual tokens, * false otherwise. * * @since 1.18, 2007-05-31 */ void splitWordTokens(boolean flag) { m_splitToken = flag; if (flag) { if (m_parenNest == 0) m_parenNest++; } else m_parenNest = 0; } /*************************************************************************** * Read the next input token from the source script. * This also updates {@link #m_lineNo} to reflect the source line number of * the returned token. * *
* Tokens are composed of simple keywords (e.g., get), or quoted * literals (e.g., "*.txt", 'get.*'). Tokens may contain * embedded variable sequences (e.g., "${name}"). Tokens may be * delimited by whitespace (spaces, tabs, and newlines), or are expressions * enclosed within parentheses ('('). * *
* Tokens cannot be longer than 2,000 characters. * *
* Blank lines are ignored. Comments start with # and end at the * end of the line (newline), and are ignored. * *
* Source lines can be split and continued on subsequent lines by preceding * the newline with a '`' (accent) escape character. Leading spaces * on the next line are ignored. * *
* Examples *
* *.txt -> *.txt * /bin/foo.ext -> /bin/foo.ext * $file.$ext -> $file.$ext * ($file.$ext) -> ( $file . $ext ) * a#bc #xyz -> a#bc nl * 'abc'.'def' -> "abc" .'def' * ('abc'.'def') -> "abc" . "def" * x+y/z*2 -> 'x+y/z*2' * (x+y/z*2) -> ( x + y / z * 2 ) * (a/b)+c/d -> ( a / b ) +c/d * &($s+1) -> & ( $s + 1 ) * "&($s+1)" -> "&($s+1)" * drt@foo.com -> drt@foo.com * (drt@foo.com) -> ( drt @foo . com ) * -123+45 +$foo -> - 123 + 45 + $foo * don`'t -> don`'t * 'foo`$bar`$' -> 'foo`$bar`$' * c: cd c: -> c : cd c:* * @return * The next token text, or {@link #TOK__NL} if an end-of-line (newline) was * read, or null if the end of the source stream was reached. * * @since 1.1, 2007-03-11 */ String readToken() throws IOException { String tok; int ch; // Check for a pushed-back token if (m_nextTok != null) { tok = m_nextTok; m_nextTok = null; return (tok); } // Read the next token from the source stream GET_TOKEN: for (;;) { // Skip whitespace m_lineNo = m_in.m_lineNo; ch = readChar(); while (ch == ' ' || ch == '\t') { m_prevSP = true; if (m_parenNest < 1) m_splitToken = false; m_lineNo = m_in.m_lineNo; ch = readChar(); } // Handle comments if (ch == '#' && m_prevSP) { // Read and ignore a comment, up to the next newline while (ch != '\n') ch = m_in.readChar(); // Don't use this.readChar() } // Handle end of line (newline) if (ch == '\n') { // Newline token, skip multiple newlines if (m_prevNL) continue GET_TOKEN; m_prevNL = true; m_prevSP = true; m_splitToken = false; m_parenNest = 0; m_leftmost = true; return (TOK__NL); } else { // Not a newline, so read the rest of the token m_prevNL = false; break GET_TOKEN; } } // Handle end of input if (ch < 0) { m_buf = null; return (null); } // Read a complete token m_prevSP = false; if (ch == '"' || ch == '\'') { // Read a quoted string literal tok = readString(ch); } else if (ch == '(' || ch == '>' || ch == '<' || ch == '&' || ch == '\u00A7' /*'§'*/ || (ch == '!' && m_leftmost)) { // Split the next word into multiple tokens following a prefix token m_splitToken = true; tok = readWordToken(ch); } else { // Read a normal word token, up to the next whitespace tok = readWordToken(ch); } m_leftmost = false; return (tok); } /*************************************************************************** * Push back the last token read from the input source. * * @see #readToken readToken() * * @since 1.1, 2007-03-13 */ void unReadToken(String tok) { m_nextTok = tok; } /*************************************************************************** * Read a string token from the source script. * * @return * The string token text, including the surrounding quote characters. * * @since 1.3, 2007-03-15 */ private String readString(int ch) throws IOException { String tok; char quote; int len; int bufi; // Read a quoted string token, up to the closing quote quote = (char) ch; bufi = 0; len = m_buf.length-1; ch = '"'; do { int ich; // Append the char to the string literal if (bufi < len) m_buf[bufi] = (char) ch; else if (bufi == len) { m_out.println(m_srcName + ":" + m_lineNo + ": warning: String literal truncated (" + len + ")"); m_out.flush(); } bufi++; // Read the next source char ich = ch; ch = readChar(); // Handle escape sequences: '$x' and '`x' if ((ich == '$' || ich == '`') && ch != '\n') ch += 0x10000; } while (ch != quote && ch != '\n'); // Terminate the string literal bufi = (bufi <= len ? bufi : len); m_buf[bufi++] = '"'; if (ch != quote) { unReadChar(ch); m_out.println(m_srcName + ":" + m_lineNo + ": warning: String literal missing its closing '" + quote + "'"); m_out.flush(); } tok = new String(m_buf, 0, bufi); return (tok); } /*************************************************************************** * Read the next word-like token from the source script. * *
* Tokens cannot be longer than 2,000 characters. * * @return * The next word token. * * @since 1.8, 2007-04-03 */ private String readWordToken(int ch) throws IOException { String tok = null; int len = m_buf.length; int bufi; // Read the next word-like token if (m_splitToken) { // Split the current word into multiple tokens bufi = 0; m_buf[bufi++] = (char) ch; switch (ch) { case '(': m_parenNest++; tok = TOK_LP; break; case ')': if (m_parenNest > 0) m_parenNest--; tok = TOK_RP; break; case '&': case '\u00A7': // '§' // Single-character session operator m_splitToken = false; tok = new String(m_buf, 0, bufi); break; case '>': case '<': // '<', '<=', '<<', '>', '>=', or '>>' ch = readChar(); if (ch == m_buf[bufi-1]) { m_buf[bufi++] = (char) ch; m_splitToken = false; } else if (ch == '=') m_buf[bufi++] = (char) ch; else unReadChar(ch); tok = new String(m_buf, 0, bufi); break; case '!': // '!', '!=', or '!~' ch = readChar(); if (ch == '=' || ch == '~') m_buf[bufi++] = (char) ch; else unReadChar(ch); tok = new String(m_buf, 0, bufi); break; case '*': case '/': case '%': case '+': case '-': case '~': case '.': case ';': case ',': case '[': case ']': // Single-character operator tok = new String(m_buf, 0, bufi); break; case ':': default: // Read a split word token, up to the next delimiter bufi--; do { if (bufi < len) m_buf[bufi] = (char) ch; else if (bufi == len) { m_out.println(m_srcName + ":" + m_lineNo + ": warning: Word truncated (" + len + ")"); m_out.flush(); } bufi++; if (ch == '$' || ch == '`') { ch = readChar(); if (ch != '\n' && bufi < len) m_buf[bufi++] = (char) ch; } ch = readChar(); } while (Character.isLetterOrDigit((char) ch) || ch == '_' || ch == '$' || ch == '`' || ch == '{' || ch == '}' || ch == '#'); unReadChar(ch); bufi = (bufi <= len ? bufi : len); tok = new String(m_buf, 0, bufi); break; } } else { // Read a normal word token, up to the next whitespace bufi = 0; do { if (m_leftmost && ch == ',' && bufi > 0) break; if (bufi < len) m_buf[bufi] = (char) ch; else if (bufi == len) { m_out.println(m_srcName + ":" + m_lineNo + ": warning: Word truncated (" + len + ")"); m_out.flush(); } bufi++; ch = readChar(); } while (ch != ' ' && ch != '\n'); unReadChar(ch); bufi = (bufi <= len ? bufi : len); tok = new String(m_buf, 0, bufi); } return (tok); } /*************************************************************************** * Read the next input character from the source script. * *
* Each newline (end of line) sequence (CR, LF, CR/LF) is translated into a * single newline character ('\n'). A newline character * ('\n') is always returned as the last character before the * end of the input stream, even if the stream does not contain a final * newline sequence. * *
* Source lines can be split and continued on subsequent lines by preceding * the newline with a '`' (accent) escape character. Leading spaces * on the next line are ignored. * * @return * The next character from the input source stream, or '\n' if an * end of line (newline) is reached, or -1 if the end of the source stream * is reached. Note that the stream is not closed after the end is * reached. * * @since 1.11, 2007-04-09 */ private int readChar() throws IOException { int ch; // Check the input stream if (m_in == null) return (-1); // Read the next char from the source stream ch = m_in.readChar(); for (;;) { // Handle split/continued lines, given by a '`+newline' sequence if (ch != '`') return (ch); ch = m_in.readChar(); if (ch != '\n') { // Not a '`+newline' sequence m_in.unReadChar(ch); return ('`'); } // Source line is split, ignore leading spaces on continued line do { ch = m_in.readChar(); } while (ch == ' ' || ch == '\t'); } } /*************************************************************************** * Push back an input character from the source script. * * @since 1.11, 2007-04-09 */ private void unReadChar(int ch) { m_in.unReadChar(ch); } } // End CommandLexer.java