///+INCOMPLETE //============================================================================== // tribble/io/Lr1Parser.java //------------------------------------------------------------------------------ package tribble.io; // System imports import java.io.IOException; import java.lang.Error; import java.lang.Exception; import java.lang.String; import java.lang.Throwable; // Local imports import tribble.io.DiagnosticOutputI; import tribble.io.DiagnosticWriterI; import tribble.io.LexerI; import tribble.io.Lr1ParserI; import tribble.io.TokenAdapter; import tribble.io.TokenI; /******************************************************************************* * Generic LR(1) parser. * *
* This implements an LR(1) parser, as a deterministic finite automaton (DFA)
* with a push-down stack. It reads tokens from an input source stream
* (implementing the {@link LexerI} interface).
*
* @version $Revision: 1.7 $ $Date: 2003/02/26 04:42:09 $
* @since 2001-08-11
* @author
* David R. Tribble,
* david@tribble.com
*
* Copyright
* ©2002-2003 by David R. Tribble, all rights reserved.
*
* Permission is granted to freely use and distribute this source code
* provided that the original copyright and authorship notices remain
* intact.
*
* @see LexerI
* @see Lr1Parser
*/
public abstract class Lr1Parser
implements Lr1ParserI, DiagnosticWriterI
{
// Identification
/** Revision information. */
static final String REV =
"@(#)tribble/io/Lr1Parser.java $Revision: 1.7 $ $Date: 2003/02/26 04:42:09 $\n";
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Protected constants
/** LR(1) parsing table format number. */
protected static final int _SERIES = 100;
//----------------------------------
// Reserved terminal symbols
/** Reserved token code: $end. */
protected static final short _T_END = -1;
/** Reserved token code: error. */
protected static final short _T_ERROR = -2;
/** Reserved token code: $any. */
protected static final short _T_ANY = -3;
/** Reserved token code: $empty. */
protected static final short _T_EMPTY = -4;
//----------------------------------
// Reserved action numbers
/** Reserved action number: $accept. */
protected static final short _A_ACCEPT = 0;
/** Reserved action number: $error. */
protected static final short _A_ERROR = 9999;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Protected variables
//----------------------------------
// Parser tables
// (with initial values supplied by derived subclasses)
/** Terminal symbol names. */
protected final String[] _TERM_ID = null;
/** Nonterminal symbol names. */
protected final String[] _NONTERM_ID = null;
/** Rule names. */
protected final String[] _RULE_ID = null;
/** Rule RHS lengths. */
protected final short[] _RHS_LEN = null;
/** State transition lookup table. */
protected final short[] _ACTION_LOOKUP = null;
/** State transition action table. */
protected final short[] _ACTION = null;
/** State reduction goto lookup table. */
protected final short[] _GOTO_LOOKUP = null;
/** State reduction goto table. */
protected final short[] _GOTO = null;
//----------------------------------
// Parser DFA objects
/** State stack, containing DFS state numbers. */
protected short[] _sStack;
/** Terminal (token) stack, containing input terminal symbols. */
protected TokenI[] _tStack;
/**
* Nonterminal stack, of user-defined type '%stacktype'.
* This is supplied by derived subclasses.
*/
protected Object[] _nStack;
/** Current stack pointer (for all three stacks). */
protected int _sp;
/** Current DFA state number. */
protected int _state;
/**
* Rule reduction result value ('$$'), of user-defined type
* '%stacktype'.
* This is supplied by derived subclasses.
*/
protected Object _r;
/** Current lookahead token. */
protected TokenI _tok;
/** Current token code (type). */
protected int _tokCode = _T_END;
/** Current token line number. */
protected int _tokLine;
//----------------------------------
// I/O streams
/** Lexer input stream. */
protected LexerI _lexer;
/** Input source (file) name. */
protected String _sourceName;
/** Output stream. */
protected DiagnosticOutputI _out;
/** Enable debugging tracing output. */
protected boolean _debug;
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Protected constructors
/***************************************************************************
* Constructor.
*
* @param series
* LR(1) parser table format number. This must be compatible with
* {@link #_SERIES}.
*
* @since 1.1, 2002-09-30
*/
protected Lr1Parser(int series)
{
// Check that the series number is compatible with this class
if (series/100 > _SERIES/100)
throw new Error("Incompatible parser(" + _SERIES + ") and table("
+ series + ") series");
}
/***************************************************************************
* Constructor.
*
* @param series
* LR(1) parser table format number. This must be compatible with
* {@link #_SERIES}.
*
* @param in
* The lexer input stream from which to read tokens.
*
* @since 1.1, 2002-09-30
*/
protected Lr1Parser(int series, LexerI in)
{
this(series);
// Establish the lexer input for this parser
setInput(in);
}
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Public methods
/***************************************************************************
* Establish the output stream to which diagnostic messages (warning and
* error messages) are written during parsing.
*
* @param out
* The error output stream.
*
*
* @since 1.1, 2001-08-11
*/
public void setOutput(DiagnosticOutputI out)
//implements tribble.io.DiagnosticWriterI
{
// Set the output stream
_out = out;
}
/***************************************************************************
* Establish the lexical analyzer (lexer) input stream from which to read
* tokens during parsing.
*
* @param in
* The lexer input stream from which to read tokens.
*
* @since 1.1, 2001-08-11
*/
public void setInput(LexerI in)
//implements tribble.io.Lr1ParserI
{
// Set the lexer input stream
_lexer = in;
}
/***************************************************************************
* Establish the input source filename.
*
* @param fname
* The source filename.
*
* @since 1.1, 2001-08-11
*/
public void setSourceName(String fn)
//implements tribble.io.Lr1ParserI
{
// Set the input source filename
if (_debug && _out != null)
_out.printMessage(_sourceName, _tokLine, "[yaccm] setSourceName("
+ (_out != null ? "\"" + fn + "\"" : "null") + ")");
_sourceName = fn;
}
/***************************************************************************
* Close the input stream.
*
*
* It is typically not necessary to invoke this method since the input * stream is usually closed automatically once parsing has completed, unless * it is necessary to close the input stream deliberately or prematurely. * *
* Note that this method does not throw any exceptions. * * @since 1.1, 2001-08-11 */ public void close() //implements tribble.io.Lr1ParserI { // Close the input stream if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] close()"); if (_lexer != null) { try { _lexer.close(); } catch (IOException ex) { // Ignore } _lexer = null; } } /*************************************************************************** * Parse the token input stream, recognizing valid sentences of an LR(1) * grammar.
... * * @return * True if the parse was successful (i.e., the sequence of tokens read from * the input stream constitutes a valid sentence of the grammar), otherwise * false. * * @throws Exception * Thrown if an I/O error or parsing error occurs. * * @since 1.1, 2001-08-11 */ public boolean parse() throws Exception //implements tribble.io.Lr1ParserI { if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] parse()..."); // Check that there is an open input stream lexer if (_lexer == null) { if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Lexer is null"); return (false); } // Initialize the parser // _nStack[] was initialized by the derived subclass if (_nStack == null || _nStack.length == 0) throw new Exception("Null/empty parser nonterminal stack"); if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Push state 0"); _sStack = new short[_nStack.length]; _sStack[0] = 0; // $accept state _tStack = new TokenI[_nStack.length]; _nStack[0] = null; _sp = 0; _r = null; _tokCode = _T_EMPTY; _tokLine = 0; _state = 0; ///Optimize by making _sStack[0] implicit only return (_resumeParse()); } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Protected methods /*************************************************************************** * Parse the token input stream, recognizing valid sentences of an LR(1) * grammar. * * @return * True if the parse was successful (i.e., the sequence of tokens read from * the input stream constitutes a valid sentence of the grammar), otherwise * false. * * @throws Exception * Thrown if an I/O error or parsing error occurs. * * @since 1.1, 2002-10-07 */ protected boolean _resumeParse() throws Exception { ///Optimize by making _sStack[0] (which contains state #0) implicit only // Begin parsing input tokens for (;;) { int act; int len; // Ensure that there is a lookahead token if (_tokCode == _T_EMPTY) { // Read the next token ///add debugs _getToken(); // Push the token onto the token stack ///...check for stack overflow; _tStack[_sp++] = _tok; } // Look up the next action based on the current state act = _ACTION_LOOKUP[_state*2]; len = _ACTION_LOOKUP[_state*2+1]; if (act < 0) { // Reduction action act = len; if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Reduce r" + act); } else { // Shift action, // look up the next action based on the lookahead symbol for (int i = act; len > 0; i++, len--) { act = _ACTION[i*2+1]; if (_ACTION[i*2] == _tokCode) break; if (_ACTION[i*2] == _T_ANY) break; act = _A_ERROR; } if (act == _A_ERROR) { int ln; // Unexpected input token found, // push an 'error' token in its place ln = _tok.getLineNumber(); if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Push error, line " + ln); _tok = new TokenAdapter(_T_ERROR, "$error", ln); /*+++INCOMPLETE // Look for a transition on terminal 'error' -?? ...act = ?>0; -?? +++*/ } } // Perform the action if (act > 0) { // Shift action, push a terminal symbol onto the stack if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Shift state " + act); ///Check for stack overflow _sStack[_sp] = (short) act; _tStack[_sp] = _tok; _sp++; _state = act; // A new token needs to be read _tokCode = _T_EMPTY; } else if (act == _A_ERROR) { ///add debugs // Error transition, begin recovery ///... ///... // Could not recover, the parse failed return (false); } else if (act < 0) { int arg1i; int expSt; ///add debugs // Reduction action act = -act; if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Reduce r" + act); // Set the default LHS result to the leftmost RHS nonterminal arg1i = _sp - _RHS_LEN[act] + 1; _r = null; if (_RHS_LEN[act] > 0) { if (_nStack[arg1i] != null) _r = _nStack[arg1i]; else if (_tStack[arg1i] != null) ;///_r = ...convert TokenI to %stacktype(_tStack[arg1i]) } // Execute a reduction action code block, if any _action(act); // Pop the RHS symbols off the stack for (int i = _RHS_LEN[act]; i-- > 0; _sp--) { _tStack[_sp] = null; _nStack[_sp] = null; } // Transition to the next goto() state from the exposed state expSt = 0; if (_sp >= 0) expSt = _sStack[_sp]; if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Uncover state " + expSt); ;///_state = _GOTO...[expSt]; _sp++; _nStack[_sp] = _r; // No cast needed _r = null; } else if (act == 0 /*_A_ACCEPT*/) { ///add debugs // Accept ($accept) action if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Accept"); // Rule 0: '$accept : goal . $end' _r = _nStack[--_sp]; _sStack = null; _tStack = null; _nStack = null; // The parse was successful return (true); } ///... } } /*************************************************************************** * Read the next token from the lexer input stream. * * @return * A token code. * Also sets {@link #_tokCode} to the same token code, and sets * {@link #_tok} to the token object just read. * * @since 1.1, 2001-08-28 */ protected int _getToken() throws IOException { // Read the next token from the lexer input stream _tokCode = _T_END; _tok = _lexer.getToken(); if (_tok == null) { if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Read EOF"); return (_T_END); } // Get the token code and line number _tokLine = _tok.getLineNumber(); _tokCode = _tok.getType(); if (_tokCode <= 0) _tokCode = _T_END; if (_debug && _out != null) _out.printMessage(_sourceName, _tokLine, "[yaccm] Read token (" + _tokCode + ") " + _tok.getText()); return (_tokCode); } /*************************************************************************** * Perform a specific user-defined action for a given grammar rule * (production). * * @param n * The rule (production) number of the associated code block. * * @throws Exception * Thrown if an error (i.e., a semantic error) occurs. * * @since 1.2, 2002-10-29 */ protected abstract void _action(int n) throws Exception; /*************************************************************************** * Finalization. * Closes and disassociates the lexer input stream and the diagnostic output * stream from this parser. * * @since 1.6, 2003-02-24 */ protected synchronized void finalize() throws Throwable { try { // Close the input stream if (_lexer != null) _lexer.close(); } catch (Exception ex) { // Ignore } // Note: Do not close the diagnostic output stream '_out' // Disassociate the input and output streams from this lexer _lexer = null; _out = null; // Cascade super.finalize(); } // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Private constructors /*************************************************************************** * Default constructor. * Do not use this constructor. * * @since 1.1, 2002-08-28 */ private Lr1Parser() { // Do nothing } } // End Lr1Parser.java