1 /** 2 * @fileoverview The lexer class for tokenizing xpath expressions. 3 */ 4 5 goog.provide('xrx.xpath.Lexer'); 6 7 8 9 /** 10 * Constructs a lexer. 11 * 12 * @param {!Array.<string>} tokens Tokens to iterate over. 13 * @constructor 14 */ 15 xrx.xpath.Lexer = function(tokens) { 16 /** 17 * @type {!Array.<string>} 18 * @private 19 */ 20 this.tokens_ = tokens; 21 22 /** 23 * @type {number} 24 * @private 25 */ 26 this.index_ = 0; 27 }; 28 29 30 /** 31 * Tokenizes a source string into an array of tokens. 32 * 33 * @param {string} source Source string to tokenize. 34 * @return {!xrx.xpath.Lexer} Essentially an iterator over the tokens. 35 */ 36 xrx.xpath.Lexer.tokenize = function(source) { 37 var tokens = source.match(xrx.xpath.Lexer.TOKEN_); 38 39 // Removes tokens starting with whitespace from the array. 40 for (var i = 0; i < tokens.length; i++) { 41 if (xrx.xpath.Lexer.LEADING_WHITESPACE_.test(tokens[i])) { 42 tokens.splice(i, 1); 43 } 44 } 45 return new xrx.xpath.Lexer(tokens); 46 }; 47 48 49 /** 50 * Regular expressions to match XPath productions. 51 * 52 * @const 53 * @type {!RegExp} 54 * @private 55 */ 56 xrx.xpath.Lexer.TOKEN_ = new RegExp( 57 '\\$?(?:(?![0-9-])[\\w-]+:)?(?![0-9-])[\\w-]+' + 58 // Nodename (possibly with namespace) or variable. 59 '|\\/\\/' + // Double slash. 60 '|\\.\\.' + // Double dot. 61 '|::' + // Double colon. 62 '|\\d+(?:\\.\\d*)?' + // Number starting with digit. 63 '|\\.\\d+' + // Number starting with decimal point. 64 '|"[^"]*"' + // Double quoted string. 65 '|\'[^\']*\'' + // Single quoted string. 66 '|[!<>]=' + // Operators 67 '|\\s+' + // Whitespaces. 68 '|.', // Any single character. 69 'g'); 70 71 72 /** 73 * Regex to check if a string starts with a whitespace character. 74 * 75 * @const 76 * @type {!RegExp} 77 * @private 78 */ 79 xrx.xpath.Lexer.LEADING_WHITESPACE_ = /^\s/; 80 81 82 /** 83 * Peeks at the lexer. An optional index can be 84 * used to specify the token peek at. 85 * 86 * @param {number=} opt_i Index to peek at. Defaults to zero. 87 * @return {string} Token peeked. 88 */ 89 xrx.xpath.Lexer.prototype.peek = function(opt_i) { 90 return this.tokens_[this.index_ + (opt_i || 0)]; 91 }; 92 93 94 /** 95 * Returns the next token from the lexer and increments the index. 96 * 97 * @return {string} The next token. 98 */ 99 xrx.xpath.Lexer.prototype.next = function() { 100 return this.tokens_[this.index_++]; 101 }; 102 103 104 /** 105 * Decrements the index by one. 106 */ 107 xrx.xpath.Lexer.prototype.back = function() { 108 this.index_--; 109 }; 110 111 112 /** 113 * Checks whether the lexer is empty. 114 * 115 * @return {boolean} Whether the lexer is empty. 116 */ 117 xrx.xpath.Lexer.prototype.empty = function() { 118 return this.tokens_.length <= this.index_; 119 }; 120