/* * Copyright (C) 2009 Google Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Generate js file as follows: // // re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \ // | sed 's|^yy\([^:]*\)*\:|case \1:|' \ // | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \ // | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \ // | sed 's|[*]cursor|this._charAt(cursor)|' \ // | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \ // | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \ // | sed 's|unsigned\ int|var|' \ // | sed 's|var\ yych|case 1: var yych|' WebInspector.SourceHTMLTokenizer = function() { WebInspector.SourceTokenizer.call(this); // The order is determined by the generated code. this._lexConditions = { INITIAL: 0, COMMENT: 1, DOCTYPE: 2, TAG: 3, DSTRING: 4, SSTRING: 5 }; this.case_INITIAL = 1000; this.case_COMMENT = 1001; this.case_DOCTYPE = 1002; this.case_TAG = 1003; this.case_DSTRING = 1004; this.case_SSTRING = 1005; this._parseConditions = { INITIAL: 0, ATTRIBUTE: 1, ATTRIBUTE_VALUE: 2, LINKIFY: 4, A_NODE: 8, SCRIPT: 16, STYLE: 32 }; this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL }; this.condition = this.initialCondition; } WebInspector.SourceHTMLTokenizer.prototype = { set line(line) { if (this._internalJavaScriptTokenizer) { var match = /<\/script/i.exec(line); if (match) { this._internalJavaScriptTokenizer.line = line.substring(0, match.index); } else this._internalJavaScriptTokenizer.line = line; } else if (this._internalCSSTokenizer) { var match = /<\/style/i.exec(line); if (match) { this._internalCSSTokenizer.line = line.substring(0, match.index); } else this._internalCSSTokenizer.line = line; } this._line = line; }, _isExpectingAttribute: function() { return this._condition.parseCondition & this._parseConditions.ATTRIBUTE; }, _isExpectingAttributeValue: function() { return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE; }, _setExpectingAttribute: function() { if (this._isExpectingAttributeValue()) this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE; this._condition.parseCondition |= this._parseConditions.ATTRIBUTE; }, _setExpectingAttributeValue: function() { if (this._isExpectingAttribute()) this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE; this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE; }, _stringToken: function(cursor, stringEnds) { if (!this._isExpectingAttributeValue()) { this.tokenType = null; return cursor; } this.tokenType = this._attrValueTokenType(); if (stringEnds) this._setExpectingAttribute(); return cursor; }, _attrValueTokenType: function() { if (this._condition.parseCondition & this._parseConditions.LINKIFY) { if (this._condition.parseCondition & this._parseConditions.A_NODE) return "html-external-link"; return "html-resource-link"; } return "html-attribute-value"; }, nextToken: function(cursor) { if (this._internalJavaScriptTokenizer) { // Re-set line to force detection first. this.line = this._line; if (cursor !== this._internalJavaScriptTokenizer._line.length) { // Tokenizer is stateless, so restore its condition before tokenizing and save it after. this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition; var result = this._internalJavaScriptTokenizer.nextToken(cursor); this.tokenType = this._internalJavaScriptTokenizer.tokenType; this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition; return result; } else if (cursor !== this._line.length) delete this._internalJavaScriptTokenizer; } else if (this._internalCSSTokenizer) { // Re-set line to force detection first. this.line = this._line; if (cursor !== this._internalCSSTokenizer._line.length) { // Tokenizer is stateless, so restore its condition before tokenizing and save it after. this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition; var result = this._internalCSSTokenizer.nextToken(cursor); this.tokenType = this._internalCSSTokenizer.tokenType; this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition; return result; } else if (cursor !== this._line.length) delete this._internalCSSTokenizer; } var cursorOnEnter = cursor; var gotoCase = 1; while (1) { switch (gotoCase) // Following comment is replaced with generated state machine. /*!re2c re2c:define:YYCTYPE = "var"; re2c:define:YYCURSOR = cursor; re2c:define:YYGETCONDITION = "this.getLexCondition"; re2c:define:YYSETCONDITION = "this.setLexCondition"; re2c:condprefix = "case this.case_"; re2c:condenumprefix = "this._lexConditions."; re2c:yyfill:enable = 0; re2c:labelprefix = "case "; re2c:indent:top = 2; re2c:indent:string = " "; CommentContent = ([^-\r\n] | ("--" [^>]))*; Comment = ""; CommentStart = ""; DocTypeStart = "]*; ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; ScriptEnd = ""; EqualSign = "="; DoubleStringContent = [^\r\n\"]*; SingleStringContent = [^\r\n\']*; StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'"; DoubleStringStart = "\"" DoubleStringContent [\r\n]; DoubleStringEnd = DoubleStringContent "\""; SingleStringStart = "'" SingleStringContent [\r\n]; SingleStringEnd = SingleStringContent "'"; Identifier = [^ \r\n"'<>\[\]=]+; Comment { this.tokenType = "html-comment"; return cursor; } CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; } CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; } CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; } DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } GT => INITIAL { this.tokenType = "html-doctype"; return cursor; } ScriptStart => TAG { if (this._condition.parseCondition & this._parseConditions.SCRIPT) { // Do not tokenize script tag contents, keep lexer state, even though processing "<". this.setLexCondition(this._lexConditions.INITIAL); this.tokenType = null; return cursor; } this.tokenType = "html-tag"; this._condition.parseCondition = this._parseConditions.SCRIPT; this._setExpectingAttribute(); return cursor; } ScriptEnd => TAG { this.tokenType = "html-tag"; this._condition.parseCondition = this._parseConditions.INITIAL; return cursor; } StyleStart => TAG { if (this._condition.parseCondition & this._parseConditions.STYLE) { // Do not tokenize style tag contents, keep lexer state, even though processing "<". this.setLexCondition(this._lexConditions.INITIAL); this.tokenType = null; return cursor; } this.tokenType = "html-tag"; this._condition.parseCondition = this._parseConditions.STYLE; this._setExpectingAttribute(); return cursor; } StyleEnd => TAG { this.tokenType = "html-tag"; this._condition.parseCondition = this._parseConditions.INITIAL; return cursor; } LT => TAG { if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) { // Do not tokenize script and style tag contents, keep lexer state, even though processing "<". this.setLexCondition(this._lexConditions.INITIAL); this.tokenType = null; return cursor; } this._condition.parseCondition = this._parseConditions.INITIAL; this.tokenType = "html-tag"; return cursor; } GT => INITIAL { this.tokenType = "html-tag"; if (this._condition.parseCondition & this._parseConditions.SCRIPT) { if (!this._internalJavaScriptTokenizer) { this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript"); this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition; } // Do not tokenize script tag contents. return cursor; } if (this._condition.parseCondition & this._parseConditions.STYLE) { if (!this._internalCSSTokenizer) { this._internalCSSTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css"); this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.initialCondition; } // Do not tokenize style tag contents. return cursor; } this._condition.parseCondition = this._parseConditions.INITIAL; return cursor; } StringLiteral { return this._stringToken(cursor, true); } DoubleStringStart => DSTRING { return this._stringToken(cursor); } DoubleStringContent => DSTRING { return this._stringToken(cursor); } DoubleStringEnd => TAG { return this._stringToken(cursor, true); } SingleStringStart => SSTRING { return this._stringToken(cursor); } SingleStringContent => SSTRING { return this._stringToken(cursor); } SingleStringEnd => TAG { return this._stringToken(cursor, true); } EqualSign => TAG { if (this._isExpectingAttribute()) this._setExpectingAttributeValue(); this.tokenType = null; return cursor; } Identifier { if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) { // Fall through if expecting attributes. this.tokenType = null; return cursor; } if (this._condition.parseCondition === this._parseConditions.INITIAL) { this.tokenType = "html-tag"; this._setExpectingAttribute(); var token = this._line.substring(cursorOnEnter, cursor); if (token === "a") this._condition.parseCondition |= this._parseConditions.A_NODE; else if (this._condition.parseCondition & this._parseConditions.A_NODE) this._condition.parseCondition ^= this._parseConditions.A_NODE; } else if (this._isExpectingAttribute()) { var token = this._line.substring(cursorOnEnter, cursor); if (token === "href" || token === "src") this._condition.parseCondition |= this._parseConditions.LINKIFY; else if (this._condition.parseCondition |= this._parseConditions.LINKIFY) this._condition.parseCondition ^= this._parseConditions.LINKIFY; this.tokenType = "html-attribute-name"; } else if (this._isExpectingAttributeValue()) this.tokenType = this._attrValueTokenType(); else this.tokenType = null; return cursor; } <*> [^] { this.tokenType = null; return cursor; } */ } } } WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;