1 /**
  2  * @fileoverview The lexer class for tokenizing xpath expressions.
  3  */
  4 
  5 goog.provide('xrx.xpath.Lexer');
  6 
  7 
  8 
  9 /**
 10  * Constructs a lexer.
 11  *
 12  * @param {!Array.<string>} tokens Tokens to iterate over.
 13  * @constructor
 14  */
 15 xrx.xpath.Lexer = function(tokens) {
 16   /**
 17    * @type {!Array.<string>}
 18    * @private
 19    */
 20   this.tokens_ = tokens;
 21 
 22   /**
 23    * @type {number}
 24    * @private
 25    */
 26   this.index_ = 0;
 27 };
 28 
 29 
 30 /**
 31  * Tokenizes a source string into an array of tokens.
 32  *
 33  * @param {string} source Source string to tokenize.
 34  * @return {!xrx.xpath.Lexer} Essentially an iterator over the tokens.
 35  */
 36 xrx.xpath.Lexer.tokenize = function(source) {
 37   var tokens = source.match(xrx.xpath.Lexer.TOKEN_);
 38 
 39   // Removes tokens starting with whitespace from the array.
 40   for (var i = 0; i < tokens.length; i++) {
 41     if (xrx.xpath.Lexer.LEADING_WHITESPACE_.test(tokens[i])) {
 42       tokens.splice(i, 1);
 43     }
 44   }
 45   return new xrx.xpath.Lexer(tokens);
 46 };
 47 
 48 
 49 /**
 50  * Regular expressions to match XPath productions.
 51  *
 52  * @const
 53  * @type {!RegExp}
 54  * @private
 55  */
 56 xrx.xpath.Lexer.TOKEN_ = new RegExp(
 57     '\\$?(?:(?![0-9-])[\\w-]+:)?(?![0-9-])[\\w-]+' +
 58         // Nodename (possibly with namespace) or variable.
 59     '|\\/\\/' + // Double slash.
 60     '|\\.\\.' + // Double dot.
 61     '|::' + // Double colon.
 62     '|\\d+(?:\\.\\d*)?' + // Number starting with digit.
 63     '|\\.\\d+' + // Number starting with decimal point.
 64     '|"[^"]*"' + // Double quoted string.
 65     '|\'[^\']*\'' + // Single quoted string.
 66     '|[!<>]=' + // Operators
 67     '|\\s+' + // Whitespaces.
 68     '|.', // Any single character.
 69     'g');
 70 
 71 
 72 /**
 73  * Regex to check if a string starts with a whitespace character.
 74  *
 75  * @const
 76  * @type {!RegExp}
 77  * @private
 78  */
 79 xrx.xpath.Lexer.LEADING_WHITESPACE_ = /^\s/;
 80 
 81 
 82 /**
 83  * Peeks at the lexer. An optional index can be
 84  * used to specify the token peek at.
 85  *
 86  * @param {number=} opt_i Index to peek at. Defaults to zero.
 87  * @return {string} Token peeked.
 88  */
 89 xrx.xpath.Lexer.prototype.peek = function(opt_i) {
 90   return this.tokens_[this.index_ + (opt_i || 0)];
 91 };
 92 
 93 
 94 /**
 95  * Returns the next token from the lexer and increments the index.
 96  *
 97  * @return {string} The next token.
 98  */
 99 xrx.xpath.Lexer.prototype.next = function() {
100   return this.tokens_[this.index_++];
101 };
102 
103 
104 /**
105  * Decrements the index by one.
106  */
107 xrx.xpath.Lexer.prototype.back = function() {
108   this.index_--;
109 };
110 
111 
112 /**
113  * Checks whether the lexer is empty.
114  *
115  * @return {boolean} Whether the lexer is empty.
116  */
117 xrx.xpath.Lexer.prototype.empty = function() {
118   return this.tokens_.length <= this.index_;
119 };
120