1 /** 2 * @fileoverview A recursive descent Parser. 3 */ 4 5 goog.provide('xrx.xpath.Parser'); 6 7 goog.require('xrx.xpath.BinaryExpr'); 8 goog.require('xrx.xpath.FilterExpr'); 9 goog.require('xrx.xpath.FunctionCall'); 10 goog.require('xrx.xpath.KindTest'); 11 goog.require('xrx.xpath.Literal'); 12 goog.require('xrx.xpath.NameTest'); 13 goog.require('xrx.xpath.Number'); 14 goog.require('xrx.xpath.PathExpr'); 15 goog.require('xrx.xpath.Predicates'); 16 goog.require('xrx.xpath.Step'); 17 goog.require('xrx.xpath.UnaryExpr'); 18 goog.require('xrx.xpath.UnionExpr'); 19 20 21 22 /** 23 * The recursive descent parser. 24 * 25 * @constructor 26 * @param {!xrx.xpath.Lexer} lexer The lexer. 27 * @param {function(string): ?string} nsResolver Namespace resolver. 28 */ 29 xrx.xpath.Parser = function(lexer, nsResolver) { 30 31 /** 32 * @private {!xrx.xpath.Lexer} 33 */ 34 this.lexer_ = lexer; 35 36 /** 37 * @private {function(string): ?string} 38 */ 39 this.nsResolver_ = nsResolver; 40 }; 41 42 43 /** 44 * Apply recursive descent parsing on the input to construct an 45 * abstract syntax tree. 46 * 47 * @return {!xrx.xpath.Expr} The root of the constructed tree. 48 */ 49 xrx.xpath.Parser.prototype.parseExpr = function() { 50 var expr, stack = []; 51 while (true) { 52 this.checkNotEmpty_('Missing right hand side of binary expression.'); 53 expr = this.parseUnaryExpr_(); // See if it's just a UnaryExpr. 54 var opString = this.lexer_.next(); 55 if (!opString) { 56 break; // Done, we have only a UnaryExpr. 57 } 58 59 var op = xrx.xpath.BinaryExpr.getOp(opString); 60 var precedence = op && op.getPrecedence(); 61 if (!precedence) { 62 this.lexer_.back(); 63 break; 64 } 65 // Precedence climbing 66 while (stack.length && 67 precedence <= stack[stack.length - 1].getPrecedence()) { 68 expr = new xrx.xpath.BinaryExpr(stack.pop(), stack.pop(), expr); 69 } 70 stack.push(expr, op); 71 } 72 while (stack.length) { 73 expr = new xrx.xpath.BinaryExpr(stack.pop(), stack.pop(), 74 /** @type {!xrx.xpath.Expr} */ (expr)); 75 } 76 return /** @type {!xrx.xpath.Expr} */ (expr); 77 }; 78 79 80 /** 81 * Checks that the lexer is not empty, 82 * displays the given error message if it is. 83 * 84 * @private 85 * @param {string} msg The error message to display. 86 */ 87 xrx.xpath.Parser.prototype.checkNotEmpty_ = function(msg) { 88 if (this.lexer_.empty()) { 89 throw Error(msg); 90 } 91 }; 92 93 94 /** 95 * Checks that the next token of the error message is the expected token. 96 * 97 * @private 98 * @param {string} expected The expected token. 99 */ 100 xrx.xpath.Parser.prototype.checkNextEquals_ = function(expected) { 101 var got = this.lexer_.next(); 102 if (got != expected) { 103 throw Error('Bad token, expected: ' + expected + ' got: ' + got); 104 } 105 }; 106 107 108 /** 109 * Checks that the next token of the error message is not the given token. 110 * 111 * @private 112 * @param {string} token The token. 113 */ 114 xrx.xpath.Parser.prototype.checkNextNotEquals_ = function(token) { 115 var next = this.lexer_.next(); 116 if (next != token) { 117 throw Error('Bad token: ' + next); 118 } 119 }; 120 121 122 /** 123 * Attempts to parse the input as a FilterExpr. 124 * 125 * @private 126 * @return {xrx.xpath.Expr} The root of the constructed tree. 127 */ 128 xrx.xpath.Parser.prototype.parseFilterExpr_ = function() { 129 var expr; 130 var token = this.lexer_.peek(); 131 var ch = token.charAt(0); 132 switch (ch) { 133 case '$': 134 throw Error('Variable reference not allowed in HTML XPath'); 135 case '(': 136 this.lexer_.next(); 137 expr = this.parseExpr(); 138 this.checkNotEmpty_('unclosed "("'); 139 this.checkNextEquals_(')'); 140 break; 141 case '"': 142 case "'": 143 expr = this.parseLiteral_(); 144 break; 145 default: 146 if (!isNaN(+token)) { 147 expr = this.parseNumber_(); 148 } else if (xrx.xpath.KindTest.isValidType(token)) { 149 return null; 150 } else if (/(?![0-9])[\w]/.test(ch) && this.lexer_.peek(1) == '(') { 151 expr = this.parseFunctionCall_(); 152 } else { 153 return null; 154 } 155 } 156 if (this.lexer_.peek() != '[') { 157 return expr; 158 } 159 var predicates = new xrx.xpath.Predicates(this.parsePredicates_()); 160 return new xrx.xpath.FilterExpr(expr, predicates); 161 }; 162 163 164 /** 165 * Parses FunctionCall. 166 * 167 * @private 168 * @return {!xrx.xpath.FunctionCall} The parsed expression. 169 */ 170 xrx.xpath.Parser.prototype.parseFunctionCall_ = function() { 171 var funcName = this.lexer_.next(); 172 var func = xrx.xpath.FunctionCall.getFunc(funcName); 173 this.lexer_.next(); 174 175 var args = []; 176 while (this.lexer_.peek() != ')') { 177 this.checkNotEmpty_('Missing function argument list.'); 178 args.push(this.parseExpr()); 179 if (this.lexer_.peek() != ',') { 180 break; 181 } 182 this.lexer_.next(); 183 } 184 this.checkNotEmpty_('Unclosed function argument list.'); 185 this.checkNextNotEquals_(')'); 186 187 return new xrx.xpath.FunctionCall(func, args); 188 }; 189 190 191 /** 192 * Parses the input to construct a KindTest. 193 * 194 * @private 195 * @return {!xrx.xpath.KindTest} The KindTest constructed. 196 */ 197 xrx.xpath.Parser.prototype.parseKindTest_ = function() { 198 var typeName = this.lexer_.next(); 199 if (!xrx.xpath.KindTest.isValidType(typeName)) { 200 throw Error('Invalid type name: ' + typeName); 201 } 202 this.checkNextEquals_('('); 203 this.checkNotEmpty_('Bad nodetype'); 204 var ch = this.lexer_.peek().charAt(0); 205 206 var literal = null; 207 if (ch == '"' || ch == "'") { 208 literal = this.parseLiteral_(); 209 } 210 this.checkNotEmpty_('Bad nodetype'); 211 this.checkNextNotEquals_(')'); 212 return new xrx.xpath.KindTest(typeName, literal); 213 }; 214 215 216 /** 217 * Parses the input to construct a Literal. 218 * 219 * @private 220 * @return {!xrx.xpath.Literal} The Literal constructed. 221 */ 222 xrx.xpath.Parser.prototype.parseLiteral_ = function() { 223 var token = this.lexer_.next(); 224 if (token.length < 2) { 225 throw Error('Unclosed literal string'); 226 } 227 return new xrx.xpath.Literal(token); 228 }; 229 230 231 /** 232 * Parses the input to construct a NameTest. 233 * 234 * @private 235 * @return {!xrx.xpath.NameTest} The NameTest constructed. 236 */ 237 xrx.xpath.Parser.prototype.parseNameTest_ = function() { 238 var name = this.lexer_.next(); 239 240 // Check whether there's a namespace prefix. 241 var colonIndex = name.indexOf(':'); 242 if (colonIndex == -1) { 243 return new xrx.xpath.NameTest(name); 244 } else { 245 var namespacePrefix = name.substring(0, colonIndex); 246 var namespaceUri = this.nsResolver_(namespacePrefix); 247 if (!namespaceUri) { 248 throw Error('Namespace prefix not declared: ' + namespacePrefix); 249 } 250 name = name.substr(colonIndex + 1); 251 return new xrx.xpath.NameTest(name, namespaceUri); 252 } 253 }; 254 255 256 /** 257 * Parses the input to construct a Number. 258 * 259 * @private 260 * @return {!xrx.xpath.Number} The Number constructed. 261 */ 262 xrx.xpath.Parser.prototype.parseNumber_ = function() { 263 return new xrx.xpath.Number(+this.lexer_.next()); 264 }; 265 266 267 /** 268 * Attempts to parse the input as a PathExpr. 269 * 270 * @private 271 * @return {!xrx.xpath.Expr} The root of the constructed tree. 272 */ 273 xrx.xpath.Parser.prototype.parsePathExpr_ = function() { 274 var op, expr; 275 var steps = []; 276 var filterExpr; 277 if (xrx.xpath.PathExpr.isValidOp(this.lexer_.peek())) { 278 op = this.lexer_.next(); 279 var token = this.lexer_.peek(); 280 if (op == '/' && (this.lexer_.empty() || 281 (token != '.' && token != '..' && token != '@' && token != '*' && 282 !/(?![0-9])[\w]/.test(token)))) { 283 return new xrx.xpath.PathExpr.RootHelperExpr(); 284 } 285 filterExpr = new xrx.xpath.PathExpr.RootHelperExpr(); 286 287 this.checkNotEmpty_('Missing next location step.'); 288 expr = this.parseStep_(op); 289 steps.push(expr); 290 } else { 291 expr = this.parseFilterExpr_(); 292 if (!expr) { 293 expr = this.parseStep_('/'); 294 filterExpr = new xrx.xpath.PathExpr.ContextHelperExpr(); 295 steps.push(expr); 296 } else if (!xrx.xpath.PathExpr.isValidOp(this.lexer_.peek())) { 297 return expr; // Done. 298 } else { 299 filterExpr = expr; 300 } 301 } 302 while (true) { 303 if (!xrx.xpath.PathExpr.isValidOp(this.lexer_.peek())) { 304 break; 305 } 306 op = this.lexer_.next(); 307 this.checkNotEmpty_('Missing next location step.'); 308 expr = this.parseStep_(op); 309 steps.push(expr); 310 } 311 return new xrx.xpath.PathExpr(filterExpr, steps); 312 }; 313 314 315 /** 316 * Parses Step. 317 * 318 * @private 319 * @param {string} op The op for this step. 320 * @return {!xrx.xpath.Step} The parsed expression. 321 */ 322 xrx.xpath.Parser.prototype.parseStep_ = function(op) { 323 var test, step, token, predicates; 324 if (op != '/' && op != '//') { 325 throw Error('Step op should be "/" or "//"'); 326 } 327 if (this.lexer_.peek() == '.') { 328 step = new xrx.xpath.Step(xrx.xpath.Step.Axis.SELF, 329 new xrx.xpath.KindTest('node')); 330 this.lexer_.next(); 331 return step; 332 } 333 else if (this.lexer_.peek() == '..') { 334 step = new xrx.xpath.Step(xrx.xpath.Step.Axis.PARENT, 335 new xrx.xpath.KindTest('node')); 336 this.lexer_.next(); 337 return step; 338 } else { 339 // Grab the axis. 340 var axis; 341 if (this.lexer_.peek() == '@') { 342 axis = xrx.xpath.Step.Axis.ATTRIBUTE; 343 this.lexer_.next(); 344 this.checkNotEmpty_('Missing attribute name'); 345 } else { 346 if (this.lexer_.peek(1) == '::') { 347 if (!/(?![0-9])[\w]/.test(this.lexer_.peek().charAt(0))) { 348 throw Error('Bad token: ' + this.lexer_.next()); 349 } 350 var axisName = this.lexer_.next(); 351 axis = xrx.xpath.Step.getAxis(axisName); 352 if (!axis) { 353 throw Error('No axis with name: ' + axisName); 354 } 355 this.lexer_.next(); 356 this.checkNotEmpty_('Missing node name'); 357 } else { 358 axis = xrx.xpath.Step.Axis.CHILD; 359 } 360 } 361 362 // Grab the test. 363 token = this.lexer_.peek(); 364 if (!/(?![0-9])[\w]/.test(token.charAt(0))) { 365 if (token == '*') { 366 test = this.parseNameTest_(); 367 } else { 368 throw Error('Bad token: ' + this.lexer_.next()); 369 } 370 } else { 371 if (this.lexer_.peek(1) == '(') { 372 if (!xrx.xpath.KindTest.isValidType(token)) { 373 throw Error('Invalid node type: ' + token); 374 } 375 test = this.parseKindTest_(); 376 } else { 377 test = this.parseNameTest_(); 378 } 379 } 380 predicates = new xrx.xpath.Predicates(this.parsePredicates_(), 381 axis.isReverse()); 382 return step || new xrx.xpath.Step(axis, test, predicates, op == '//'); 383 } 384 }; 385 386 387 /** 388 * Parses and returns the predicates from the this.lexer_. 389 * 390 * @private 391 * @return {!Array.<!xrx.xpath.Expr>} An array of the predicates. 392 */ 393 xrx.xpath.Parser.prototype.parsePredicates_ = function() { 394 var predicates = []; 395 while (this.lexer_.peek() == '[') { 396 this.lexer_.next(); 397 this.checkNotEmpty_('Missing predicate expression.'); 398 var predicate = this.parseExpr(); 399 predicates.push(predicate); 400 this.checkNotEmpty_('Unclosed predicate expression.'); 401 this.checkNextEquals_(']'); 402 } 403 return predicates; 404 }; 405 406 407 /** 408 * Attempts to parse the input as a unary expression with 409 * recursive descent parsing. 410 * 411 * @private 412 * @return {!xrx.xpath.Expr} The root of the constructed tree. 413 */ 414 xrx.xpath.Parser.prototype.parseUnaryExpr_ = function() { 415 if (this.lexer_.peek() == '-') { 416 this.lexer_.next(); 417 return new xrx.xpath.UnaryExpr(this.parseUnaryExpr_()); 418 } else { 419 return this.parseUnionExpr_(); 420 } 421 }; 422 423 424 /** 425 * Attempts to parse the input as a union expression with 426 * recursive descent parsing. 427 * 428 * @private 429 * @return {!xrx.xpath.Expr} The root of the constructed tree. 430 */ 431 xrx.xpath.Parser.prototype.parseUnionExpr_ = function() { 432 var expr = this.parsePathExpr_(); 433 if (!(this.lexer_.peek() == '|')) { 434 return expr; // Not a UnionExpr, returning as is. 435 } 436 var paths = [expr]; 437 while (this.lexer_.next() == '|') { 438 this.checkNotEmpty_('Missing next union location path.'); 439 paths.push(this.parsePathExpr_()); 440 } 441 this.lexer_.back(); 442 return new xrx.xpath.UnionExpr(paths); 443 }; 444