1 /**
  2  * @fileoverview A recursive descent Parser.
  3  */
  4 
  5 goog.provide('xrx.xpath.Parser');
  6 
  7 goog.require('xrx.xpath.BinaryExpr');
  8 goog.require('xrx.xpath.FilterExpr');
  9 goog.require('xrx.xpath.FunctionCall');
 10 goog.require('xrx.xpath.KindTest');
 11 goog.require('xrx.xpath.Literal');
 12 goog.require('xrx.xpath.NameTest');
 13 goog.require('xrx.xpath.Number');
 14 goog.require('xrx.xpath.PathExpr');
 15 goog.require('xrx.xpath.Predicates');
 16 goog.require('xrx.xpath.Step');
 17 goog.require('xrx.xpath.UnaryExpr');
 18 goog.require('xrx.xpath.UnionExpr');
 19 
 20 
 21 
 22 /**
 23  * The recursive descent parser.
 24  *
 25  * @constructor
 26  * @param {!xrx.xpath.Lexer} lexer The lexer.
 27  * @param {function(string): ?string} nsResolver Namespace resolver.
 28  */
 29 xrx.xpath.Parser = function(lexer, nsResolver) {
 30 
 31   /**
 32    * @private {!xrx.xpath.Lexer}
 33    */
 34   this.lexer_ = lexer;
 35 
 36   /**
 37    * @private {function(string): ?string}
 38    */
 39   this.nsResolver_ = nsResolver;
 40 };
 41 
 42 
 43 /**
 44  * Apply recursive descent parsing on the input to construct an
 45  * abstract syntax tree.
 46  *
 47  * @return {!xrx.xpath.Expr} The root of the constructed tree.
 48  */
 49 xrx.xpath.Parser.prototype.parseExpr = function() {
 50   var expr, stack = [];
 51   while (true) {
 52     this.checkNotEmpty_('Missing right hand side of binary expression.');
 53     expr = this.parseUnaryExpr_(); // See if it's just a UnaryExpr.
 54     var opString = this.lexer_.next();
 55     if (!opString) {
 56       break; // Done, we have only a UnaryExpr.
 57     }
 58 
 59     var op = xrx.xpath.BinaryExpr.getOp(opString);
 60     var precedence = op && op.getPrecedence();
 61     if (!precedence) {
 62       this.lexer_.back();
 63       break;
 64     }
 65     // Precedence climbing
 66     while (stack.length &&
 67         precedence <= stack[stack.length - 1].getPrecedence()) {
 68       expr = new xrx.xpath.BinaryExpr(stack.pop(), stack.pop(), expr);
 69     }
 70     stack.push(expr, op);
 71   }
 72   while (stack.length) {
 73     expr = new xrx.xpath.BinaryExpr(stack.pop(), stack.pop(),
 74         /** @type {!xrx.xpath.Expr} */ (expr));
 75   }
 76   return /** @type {!xrx.xpath.Expr} */ (expr);
 77 };
 78 
 79 
 80 /**
 81  * Checks that the lexer is not empty,
 82  *     displays the given error message if it is.
 83  *
 84  * @private
 85  * @param {string} msg The error message to display.
 86  */
 87 xrx.xpath.Parser.prototype.checkNotEmpty_ = function(msg) {
 88   if (this.lexer_.empty()) {
 89     throw Error(msg);
 90   }
 91 };
 92 
 93 
 94 /**
 95  * Checks that the next token of the error message is the expected token.
 96  *
 97  * @private
 98  * @param {string} expected The expected token.
 99  */
100 xrx.xpath.Parser.prototype.checkNextEquals_ = function(expected) {
101   var got = this.lexer_.next();
102   if (got != expected) {
103     throw Error('Bad token, expected: ' + expected + ' got: ' + got);
104   }
105 };
106 
107 
108 /**
109  * Checks that the next token of the error message is not the given token.
110  *
111  * @private
112  * @param {string} token The token.
113  */
114 xrx.xpath.Parser.prototype.checkNextNotEquals_ = function(token) {
115   var next = this.lexer_.next();
116   if (next != token) {
117     throw Error('Bad token: ' + next);
118   }
119 };
120 
121 
122 /**
123  * Attempts to parse the input as a FilterExpr.
124  *
125  * @private
126  * @return {xrx.xpath.Expr} The root of the constructed tree.
127  */
128 xrx.xpath.Parser.prototype.parseFilterExpr_ = function() {
129   var expr;
130   var token = this.lexer_.peek();
131   var ch = token.charAt(0);
132   switch (ch) {
133     case '$':
134       throw Error('Variable reference not allowed in HTML XPath');
135     case '(':
136       this.lexer_.next();
137       expr = this.parseExpr();
138       this.checkNotEmpty_('unclosed "("');
139       this.checkNextEquals_(')');
140       break;
141     case '"':
142     case "'":
143       expr = this.parseLiteral_();
144       break;
145     default:
146       if (!isNaN(+token)) {
147         expr = this.parseNumber_();
148       } else if (xrx.xpath.KindTest.isValidType(token)) {
149         return null;
150       } else if (/(?![0-9])[\w]/.test(ch) && this.lexer_.peek(1) == '(') {
151         expr = this.parseFunctionCall_();
152       } else {
153         return null;
154       }
155   }
156   if (this.lexer_.peek() != '[') {
157     return expr;
158   }
159   var predicates = new xrx.xpath.Predicates(this.parsePredicates_());
160   return new xrx.xpath.FilterExpr(expr, predicates);
161 };
162 
163 
164 /**
165  * Parses FunctionCall.
166  *
167  * @private
168  * @return {!xrx.xpath.FunctionCall} The parsed expression.
169  */
170 xrx.xpath.Parser.prototype.parseFunctionCall_ = function() {
171   var funcName = this.lexer_.next();
172   var func = xrx.xpath.FunctionCall.getFunc(funcName);
173   this.lexer_.next();
174 
175   var args = [];
176   while (this.lexer_.peek() != ')') {
177     this.checkNotEmpty_('Missing function argument list.');
178     args.push(this.parseExpr());
179     if (this.lexer_.peek() != ',') {
180       break;
181     }
182     this.lexer_.next();
183   }
184   this.checkNotEmpty_('Unclosed function argument list.');
185   this.checkNextNotEquals_(')');
186 
187   return new xrx.xpath.FunctionCall(func, args);
188 };
189 
190 
191 /**
192  * Parses the input to construct a KindTest.
193  *
194  * @private
195  * @return {!xrx.xpath.KindTest} The KindTest constructed.
196  */
197 xrx.xpath.Parser.prototype.parseKindTest_ = function() {
198   var typeName = this.lexer_.next();
199   if (!xrx.xpath.KindTest.isValidType(typeName)) {
200     throw Error('Invalid type name: ' + typeName);
201   }
202   this.checkNextEquals_('(');
203   this.checkNotEmpty_('Bad nodetype');
204   var ch = this.lexer_.peek().charAt(0);
205 
206   var literal = null;
207   if (ch == '"' || ch == "'") {
208     literal = this.parseLiteral_();
209   }
210   this.checkNotEmpty_('Bad nodetype');
211   this.checkNextNotEquals_(')');
212   return new xrx.xpath.KindTest(typeName, literal);
213 };
214 
215 
216 /**
217  * Parses the input to construct a Literal.
218  *
219  * @private
220  * @return {!xrx.xpath.Literal} The Literal constructed.
221  */
222 xrx.xpath.Parser.prototype.parseLiteral_ = function() {
223   var token = this.lexer_.next();
224   if (token.length < 2) {
225     throw Error('Unclosed literal string');
226   }
227   return new xrx.xpath.Literal(token);
228 };
229 
230 
231 /**
232  * Parses the input to construct a NameTest.
233  *
234  * @private
235  * @return {!xrx.xpath.NameTest} The NameTest constructed.
236  */
237 xrx.xpath.Parser.prototype.parseNameTest_ = function() {
238   var name = this.lexer_.next();
239 
240   // Check whether there's a namespace prefix.
241   var colonIndex = name.indexOf(':');
242   if (colonIndex == -1) {
243     return new xrx.xpath.NameTest(name);
244   } else {
245     var namespacePrefix = name.substring(0, colonIndex);
246     var namespaceUri = this.nsResolver_(namespacePrefix);
247     if (!namespaceUri) {
248       throw Error('Namespace prefix not declared: ' + namespacePrefix);
249     }
250     name = name.substr(colonIndex + 1);
251     return new xrx.xpath.NameTest(name, namespaceUri);
252   }
253 };
254 
255 
256 /**
257  * Parses the input to construct a Number.
258  *
259  * @private
260  * @return {!xrx.xpath.Number} The Number constructed.
261  */
262 xrx.xpath.Parser.prototype.parseNumber_ = function() {
263   return new xrx.xpath.Number(+this.lexer_.next());
264 };
265 
266 
267 /**
268  * Attempts to parse the input as a PathExpr.
269  *
270  * @private
271  * @return {!xrx.xpath.Expr} The root of the constructed tree.
272  */
273 xrx.xpath.Parser.prototype.parsePathExpr_ = function() {
274   var op, expr;
275   var steps = [];
276   var filterExpr;
277   if (xrx.xpath.PathExpr.isValidOp(this.lexer_.peek())) {
278     op = this.lexer_.next();
279     var token = this.lexer_.peek();
280     if (op == '/' && (this.lexer_.empty() ||
281         (token != '.' && token != '..' && token != '@' && token != '*' &&
282         !/(?![0-9])[\w]/.test(token)))) {
283       return new xrx.xpath.PathExpr.RootHelperExpr();
284     }
285     filterExpr = new xrx.xpath.PathExpr.RootHelperExpr();
286 
287     this.checkNotEmpty_('Missing next location step.');
288     expr = this.parseStep_(op);
289     steps.push(expr);
290   } else {
291     expr = this.parseFilterExpr_();
292     if (!expr) {
293       expr = this.parseStep_('/');
294       filterExpr = new xrx.xpath.PathExpr.ContextHelperExpr();
295       steps.push(expr);
296     } else if (!xrx.xpath.PathExpr.isValidOp(this.lexer_.peek())) {
297       return expr; // Done.
298     } else {
299       filterExpr = expr;
300     }
301   }
302   while (true) {
303     if (!xrx.xpath.PathExpr.isValidOp(this.lexer_.peek())) {
304       break;
305     }
306     op = this.lexer_.next();
307     this.checkNotEmpty_('Missing next location step.');
308     expr = this.parseStep_(op);
309     steps.push(expr);
310   }
311   return new xrx.xpath.PathExpr(filterExpr, steps);
312 };
313 
314 
315 /**
316  * Parses Step.
317  *
318  * @private
319  * @param {string} op The op for this step.
320  * @return {!xrx.xpath.Step} The parsed expression.
321  */
322 xrx.xpath.Parser.prototype.parseStep_ = function(op) {
323   var test, step, token, predicates;
324   if (op != '/' && op != '//') {
325     throw Error('Step op should be "/" or "//"');
326   }
327   if (this.lexer_.peek() == '.') {
328     step = new xrx.xpath.Step(xrx.xpath.Step.Axis.SELF,
329         new xrx.xpath.KindTest('node'));
330     this.lexer_.next();
331     return step;
332   }
333   else if (this.lexer_.peek() == '..') {
334     step = new xrx.xpath.Step(xrx.xpath.Step.Axis.PARENT,
335         new xrx.xpath.KindTest('node'));
336     this.lexer_.next();
337     return step;
338   } else {
339     // Grab the axis.
340     var axis;
341     if (this.lexer_.peek() == '@') {
342       axis = xrx.xpath.Step.Axis.ATTRIBUTE;
343       this.lexer_.next();
344       this.checkNotEmpty_('Missing attribute name');
345     } else {
346       if (this.lexer_.peek(1) == '::') {
347         if (!/(?![0-9])[\w]/.test(this.lexer_.peek().charAt(0))) {
348           throw Error('Bad token: ' + this.lexer_.next());
349         }
350         var axisName = this.lexer_.next();
351         axis = xrx.xpath.Step.getAxis(axisName);
352         if (!axis) {
353           throw Error('No axis with name: ' + axisName);
354         }
355         this.lexer_.next();
356         this.checkNotEmpty_('Missing node name');
357       } else {
358         axis = xrx.xpath.Step.Axis.CHILD;
359       }
360     }
361 
362     // Grab the test.
363     token = this.lexer_.peek();
364     if (!/(?![0-9])[\w]/.test(token.charAt(0))) {
365       if (token == '*') {
366         test = this.parseNameTest_();
367       } else {
368         throw Error('Bad token: ' + this.lexer_.next());
369       }
370     } else {
371       if (this.lexer_.peek(1) == '(') {
372         if (!xrx.xpath.KindTest.isValidType(token)) {
373           throw Error('Invalid node type: ' + token);
374         }
375         test = this.parseKindTest_();
376       } else {
377         test = this.parseNameTest_();
378       }
379     }
380     predicates = new xrx.xpath.Predicates(this.parsePredicates_(),
381         axis.isReverse());
382     return step || new xrx.xpath.Step(axis, test, predicates, op == '//');
383   }
384 };
385 
386 
387 /**
388  * Parses and returns the predicates from the this.lexer_.
389  *
390  * @private
391  * @return {!Array.<!xrx.xpath.Expr>} An array of the predicates.
392  */
393 xrx.xpath.Parser.prototype.parsePredicates_ = function() {
394   var predicates = [];
395   while (this.lexer_.peek() == '[') {
396     this.lexer_.next();
397     this.checkNotEmpty_('Missing predicate expression.');
398     var predicate = this.parseExpr();
399     predicates.push(predicate);
400     this.checkNotEmpty_('Unclosed predicate expression.');
401     this.checkNextEquals_(']');
402   }
403   return predicates;
404 };
405 
406 
407 /**
408  * Attempts to parse the input as a unary expression with
409  * recursive descent parsing.
410  *
411  * @private
412  * @return {!xrx.xpath.Expr} The root of the constructed tree.
413  */
414 xrx.xpath.Parser.prototype.parseUnaryExpr_ = function() {
415   if (this.lexer_.peek() == '-') {
416     this.lexer_.next();
417     return new xrx.xpath.UnaryExpr(this.parseUnaryExpr_());
418   } else {
419     return this.parseUnionExpr_();
420   }
421 };
422 
423 
424 /**
425  * Attempts to parse the input as a union expression with
426  * recursive descent parsing.
427  *
428  * @private
429  * @return {!xrx.xpath.Expr} The root of the constructed tree.
430  */
431 xrx.xpath.Parser.prototype.parseUnionExpr_ = function() {
432   var expr = this.parsePathExpr_();
433   if (!(this.lexer_.peek() == '|')) {
434     return expr;  // Not a UnionExpr, returning as is.
435   }
436   var paths = [expr];
437   while (this.lexer_.next() == '|') {
438     this.checkNotEmpty_('Missing next union location path.');
439     paths.push(this.parsePathExpr_());
440   }
441   this.lexer_.back();
442   return new xrx.xpath.UnionExpr(paths);
443 };
444