342 lines
13 KiB
JavaScript
342 lines
13 KiB
JavaScript
/* Parse function for JavaScript. Makes use of the tokenizer from
|
|
* tokenizejavascript.js. Note that your parsers do not have to be
|
|
* this complicated -- if you don't want to recognize local variables,
|
|
* in many languages it is enough to just look for braces, semicolons,
|
|
* parentheses, etc, and know when you are inside a string or comment.
|
|
*
|
|
* See manual.html for more info about the parser interface.
|
|
*/
|
|
|
|
var JSParser = Editor.Parser = (function() {
|
|
// Token types that can be considered to be atoms.
|
|
var atomicTypes = {"atom": true, "number": true, "variable": true, "string": true, "regexp": true};
|
|
// Constructor for the lexical context objects.
|
|
function JSLexical(indented, column, type, align, prev, info) {
|
|
// indentation at start of this line
|
|
this.indented = indented;
|
|
// column at which this scope was opened
|
|
this.column = column;
|
|
// type of scope ('vardef', 'stat' (statement), 'form' (special form), '[', '{', or '(')
|
|
this.type = type;
|
|
// '[', '{', or '(' blocks that have any text after their opening
|
|
// character are said to be 'aligned' -- any lines below are
|
|
// indented all the way to the opening character.
|
|
if (align != null)
|
|
this.align = align;
|
|
// Parent scope, if any.
|
|
this.prev = prev;
|
|
this.info = info;
|
|
}
|
|
|
|
// My favourite JavaScript indentation rules.
|
|
function indentJS(lexical) {
|
|
return function(firstChars) {
|
|
var firstChar = firstChars && firstChars.charAt(0), type = lexical.type;
|
|
var closing = firstChar == type;
|
|
if (type == "vardef")
|
|
return lexical.indented + 4;
|
|
else if (type == "form" && firstChar == "{")
|
|
return lexical.indented;
|
|
else if (type == "stat" || type == "form")
|
|
return lexical.indented + indentUnit;
|
|
else if (lexical.info == "switch" && !closing)
|
|
return lexical.indented + (/^(?:case|default)\b/.test(firstChars) ? indentUnit : 2 * indentUnit);
|
|
else if (lexical.align)
|
|
return lexical.column - (closing ? 1 : 0);
|
|
else
|
|
return lexical.indented + (closing ? 0 : indentUnit);
|
|
};
|
|
}
|
|
|
|
// The parser-iterator-producing function itself.
|
|
function parseJS(input, basecolumn) {
|
|
// Wrap the input in a token stream
|
|
var tokens = tokenizeJavaScript(input);
|
|
// The parser state. cc is a stack of actions that have to be
|
|
// performed to finish the current statement. For example we might
|
|
// know that we still need to find a closing parenthesis and a
|
|
// semicolon. Actions at the end of the stack go first. It is
|
|
// initialized with an infinitely looping action that consumes
|
|
// whole statements.
|
|
var cc = [statements];
|
|
// Context contains information about the current local scope, the
|
|
// variables defined in that, and the scopes above it.
|
|
var context = null;
|
|
// The lexical scope, used mostly for indentation.
|
|
var lexical = new JSLexical((basecolumn || 0) - indentUnit, 0, "block", false);
|
|
// Current column, and the indentation at the start of the current
|
|
// line. Used to create lexical scope objects.
|
|
var column = 0;
|
|
var indented = 0;
|
|
// Variables which are used by the mark, cont, and pass functions
|
|
// below to communicate with the driver loop in the 'next'
|
|
// function.
|
|
var consume, marked;
|
|
|
|
// The iterator object.
|
|
var parser = {next: next, copy: copy};
|
|
|
|
function next(){
|
|
// Start by performing any 'lexical' actions (adjusting the
|
|
// lexical variable), or the operations below will be working
|
|
// with the wrong lexical state.
|
|
while(cc[cc.length - 1].lex)
|
|
cc.pop()();
|
|
|
|
// Fetch a token.
|
|
var token = tokens.next();
|
|
|
|
// Adjust column and indented.
|
|
if (token.type == "whitespace" && column == 0)
|
|
indented = token.value.length;
|
|
column += token.value.length;
|
|
if (token.content == "\n"){
|
|
indented = column = 0;
|
|
// If the lexical scope's align property is still undefined at
|
|
// the end of the line, it is an un-aligned scope.
|
|
if (!("align" in lexical))
|
|
lexical.align = false;
|
|
// Newline tokens get an indentation function associated with
|
|
// them.
|
|
token.indentation = indentJS(lexical);
|
|
}
|
|
// No more processing for meaningless tokens.
|
|
if (token.type == "whitespace" || token.type == "comment")
|
|
return token;
|
|
// When a meaningful token is found and the lexical scope's
|
|
// align is undefined, it is an aligned scope.
|
|
if (!("align" in lexical))
|
|
lexical.align = true;
|
|
|
|
// Execute actions until one 'consumes' the token and we can
|
|
// return it.
|
|
while(true) {
|
|
consume = marked = false;
|
|
// Take and execute the topmost action.
|
|
cc.pop()(token.type, token.content);
|
|
if (consume){
|
|
// Marked is used to change the style of the current token.
|
|
if (marked)
|
|
token.style = marked;
|
|
// Here we differentiate between local and global variables.
|
|
else if (token.type == "variable" && inScope(token.content))
|
|
token.style = "js-localvariable";
|
|
return token;
|
|
}
|
|
}
|
|
}
|
|
|
|
// This makes a copy of the parser state. It stores all the
|
|
// stateful variables in a closure, and returns a function that
|
|
// will restore them when called with a new input stream. Note
|
|
// that the cc array has to be copied, because it is contantly
|
|
// being modified. Lexical objects are not mutated, and context
|
|
// objects are not mutated in a harmful way, so they can be shared
|
|
// between runs of the parser.
|
|
function copy(){
|
|
var _context = context, _lexical = lexical, _cc = cc.concat([]), _tokenState = tokens.state;
|
|
|
|
return function copyParser(input){
|
|
context = _context;
|
|
lexical = _lexical;
|
|
cc = _cc.concat([]); // copies the array
|
|
column = indented = 0;
|
|
tokens = tokenizeJavaScript(input, _tokenState);
|
|
return parser;
|
|
};
|
|
}
|
|
|
|
// Helper function for pushing a number of actions onto the cc
|
|
// stack in reverse order.
|
|
function push(fs){
|
|
for (var i = fs.length - 1; i >= 0; i--)
|
|
cc.push(fs[i]);
|
|
}
|
|
// cont and pass are used by the action functions to add other
|
|
// actions to the stack. cont will cause the current token to be
|
|
// consumed, pass will leave it for the next action.
|
|
function cont(){
|
|
push(arguments);
|
|
consume = true;
|
|
}
|
|
function pass(){
|
|
push(arguments);
|
|
consume = false;
|
|
}
|
|
// Used to change the style of the current token.
|
|
function mark(style){
|
|
marked = style;
|
|
}
|
|
|
|
// Push a new scope. Will automatically link the current scope.
|
|
function pushcontext(){
|
|
context = {prev: context, vars: {"this": true, "arguments": true}};
|
|
}
|
|
// Pop off the current scope.
|
|
function popcontext(){
|
|
context = context.prev;
|
|
}
|
|
// Register a variable in the current scope.
|
|
function register(varname){
|
|
if (context){
|
|
mark("js-variabledef");
|
|
context.vars[varname] = true;
|
|
}
|
|
}
|
|
// Check whether a variable is defined in the current scope.
|
|
function inScope(varname){
|
|
var cursor = context;
|
|
while (cursor) {
|
|
if (cursor.vars[varname])
|
|
return true;
|
|
cursor = cursor.prev;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Push a new lexical context of the given type.
|
|
function pushlex(type, info) {
|
|
var result = function(){
|
|
lexical = new JSLexical(indented, column, type, null, lexical, info)
|
|
};
|
|
result.lex = true;
|
|
return result;
|
|
}
|
|
// Pop off the current lexical context.
|
|
function poplex(){
|
|
lexical = lexical.prev;
|
|
}
|
|
poplex.lex = true;
|
|
// The 'lex' flag on these actions is used by the 'next' function
|
|
// to know they can (and have to) be ran before moving on to the
|
|
// next token.
|
|
|
|
// Creates an action that discards tokens until it finds one of
|
|
// the given type.
|
|
function expect(wanted){
|
|
return function expecting(type){
|
|
if (type == wanted) cont();
|
|
else cont(arguments.callee);
|
|
};
|
|
}
|
|
|
|
// Looks for a statement, and then calls itself.
|
|
function statements(type){
|
|
return pass(statement, statements);
|
|
}
|
|
// Dispatches various types of statements based on the type of the
|
|
// current token.
|
|
function statement(type){
|
|
if (type == "var") cont(pushlex("vardef"), vardef1, expect(";"), poplex);
|
|
else if (type == "keyword a") cont(pushlex("form"), expression, statement, poplex);
|
|
else if (type == "keyword b") cont(pushlex("form"), statement, poplex);
|
|
else if (type == "{") cont(pushlex("}"), block, poplex);
|
|
else if (type == "function") cont(functiondef);
|
|
else if (type == "for") cont(pushlex("form"), expect("("), pushlex(")"), forspec1, expect(")"), poplex, statement, poplex);
|
|
else if (type == "variable") cont(pushlex("stat"), maybelabel);
|
|
else if (type == "switch") cont(pushlex("form"), expression, pushlex("}", "switch"), expect("{"), block, poplex, poplex);
|
|
else if (type == "case") cont(expression, expect(":"));
|
|
else if (type == "default") cont(expect(":"));
|
|
else if (type == "catch") cont(pushlex("form"), pushcontext, expect("("), funarg, expect(")"), statement, poplex, popcontext);
|
|
else pass(pushlex("stat"), expression, expect(";"), poplex);
|
|
}
|
|
// Dispatch expression types.
|
|
function expression(type){
|
|
if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator);
|
|
else if (type == "function") cont(functiondef);
|
|
else if (type == "keyword c") cont(expression);
|
|
else if (type == "(") cont(pushlex(")"), expression, expect(")"), poplex, maybeoperator);
|
|
else if (type == "operator") cont(expression);
|
|
else if (type == "[") cont(pushlex("]"), commasep(expression, "]"), poplex, maybeoperator);
|
|
else if (type == "{") cont(pushlex("}"), commasep(objprop, "}"), poplex, maybeoperator);
|
|
}
|
|
// Called for places where operators, function calls, or
|
|
// subscripts are valid. Will skip on to the next action if none
|
|
// is found.
|
|
function maybeoperator(type){
|
|
if (type == "operator") cont(expression);
|
|
else if (type == "(") cont(pushlex(")"), expression, commasep(expression, ")"), poplex, maybeoperator);
|
|
else if (type == ".") cont(property, maybeoperator);
|
|
else if (type == "[") cont(pushlex("]"), expression, expect("]"), poplex, maybeoperator);
|
|
}
|
|
// When a statement starts with a variable name, it might be a
|
|
// label. If no colon follows, it's a regular statement.
|
|
function maybelabel(type){
|
|
if (type == ":") cont(poplex, statement);
|
|
else pass(maybeoperator, expect(";"), poplex);
|
|
}
|
|
// Property names need to have their style adjusted -- the
|
|
// tokenizer thinks they are variables.
|
|
function property(type){
|
|
if (type == "variable") {mark("js-property"); cont();}
|
|
}
|
|
// This parses a property and its value in an object literal.
|
|
function objprop(type){
|
|
if (type == "variable") mark("js-property");
|
|
if (atomicTypes.hasOwnProperty(type)) cont(expect(":"), expression);
|
|
}
|
|
// Parses a comma-separated list of the things that are recognized
|
|
// by the 'what' argument.
|
|
function commasep(what, end){
|
|
function proceed(type) {
|
|
if (type == ",") cont(what, proceed);
|
|
else if (type == end) cont();
|
|
else cont(expect(end));
|
|
};
|
|
return function commaSeparated(type) {
|
|
if (type == end) cont();
|
|
else pass(what, proceed);
|
|
};
|
|
}
|
|
// Look for statements until a closing brace is found.
|
|
function block(type){
|
|
if (type == "}") cont();
|
|
else pass(statement, block);
|
|
}
|
|
// Variable definitions are split into two actions -- 1 looks for
|
|
// a name or the end of the definition, 2 looks for an '=' sign or
|
|
// a comma.
|
|
function vardef1(type, value){
|
|
if (type == "variable"){register(value); cont(vardef2);}
|
|
else cont();
|
|
}
|
|
function vardef2(type, value){
|
|
if (value == "=") cont(expression, vardef2);
|
|
else if (type == ",") cont(vardef1);
|
|
}
|
|
// For loops.
|
|
function forspec1(type){
|
|
if (type == "var") cont(vardef1, forspec2);
|
|
else if (type == ";") pass(forspec2);
|
|
else if (type == "variable") cont(formaybein);
|
|
else pass(forspec2);
|
|
}
|
|
function formaybein(type, value){
|
|
if (value == "in") cont(expression);
|
|
else cont(maybeoperator, forspec2);
|
|
}
|
|
function forspec2(type, value){
|
|
if (type == ";") cont(forspec3);
|
|
else if (value == "in") cont(expression);
|
|
else cont(expression, expect(";"), forspec3);
|
|
}
|
|
function forspec3(type) {
|
|
if (type == ")") pass();
|
|
else cont(expression);
|
|
}
|
|
// A function definition creates a new context, and the variables
|
|
// in its argument list have to be added to this context.
|
|
function functiondef(type, value){
|
|
if (type == "variable"){register(value); cont(functiondef);}
|
|
else if (type == "(") cont(pushcontext, commasep(funarg, ")"), statement, popcontext);
|
|
}
|
|
function funarg(type, value){
|
|
if (type == "variable"){register(value); cont();}
|
|
}
|
|
|
|
return parser;
|
|
}
|
|
|
|
return {make: parseJS, electricChars: "{}:"};
|
|
})();
|