356 lines
16 KiB
Plaintext
356 lines
16 KiB
Plaintext
/*
|
|
* Copyright (C) 2009 Google Inc. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are
|
|
* met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above
|
|
* copyright notice, this list of conditions and the following disclaimer
|
|
* in the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of Google Inc. nor the names of its
|
|
* contributors may be used to endorse or promote products derived from
|
|
* this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
// Generate js file as follows:
|
|
//
|
|
// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
|
|
// | sed 's|^yy\([^:]*\)*\:|case \1:|' \
|
|
// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
|
|
// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
|
|
// | sed 's|[*]cursor|this._charAt(cursor)|' \
|
|
// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
|
|
// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
|
|
// | sed 's|unsigned\ int|var|' \
|
|
// | sed 's|var\ yych|case 1: var yych|'
|
|
|
|
WebInspector.SourceHTMLTokenizer = function()
|
|
{
|
|
WebInspector.SourceTokenizer.call(this);
|
|
|
|
// The order is determined by the generated code.
|
|
this._lexConditions = {
|
|
INITIAL: 0,
|
|
COMMENT: 1,
|
|
DOCTYPE: 2,
|
|
TAG: 3,
|
|
DSTRING: 4,
|
|
SSTRING: 5
|
|
};
|
|
this.case_INITIAL = 1000;
|
|
this.case_COMMENT = 1001;
|
|
this.case_DOCTYPE = 1002;
|
|
this.case_TAG = 1003;
|
|
this.case_DSTRING = 1004;
|
|
this.case_SSTRING = 1005;
|
|
|
|
this._parseConditions = {
|
|
INITIAL: 0,
|
|
ATTRIBUTE: 1,
|
|
ATTRIBUTE_VALUE: 2,
|
|
LINKIFY: 4,
|
|
A_NODE: 8,
|
|
SCRIPT: 16,
|
|
STYLE: 32
|
|
};
|
|
|
|
this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
|
|
this.condition = this.initialCondition;
|
|
}
|
|
|
|
WebInspector.SourceHTMLTokenizer.prototype = {
|
|
set line(line) {
|
|
if (this._internalJavaScriptTokenizer) {
|
|
var match = /<\/script/i.exec(line);
|
|
if (match) {
|
|
this._internalJavaScriptTokenizer.line = line.substring(0, match.index);
|
|
} else
|
|
this._internalJavaScriptTokenizer.line = line;
|
|
} else if (this._internalCSSTokenizer) {
|
|
var match = /<\/style/i.exec(line);
|
|
if (match) {
|
|
this._internalCSSTokenizer.line = line.substring(0, match.index);
|
|
} else
|
|
this._internalCSSTokenizer.line = line;
|
|
}
|
|
this._line = line;
|
|
},
|
|
|
|
_isExpectingAttribute: function()
|
|
{
|
|
return this._condition.parseCondition & this._parseConditions.ATTRIBUTE;
|
|
},
|
|
|
|
_isExpectingAttributeValue: function()
|
|
{
|
|
return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
|
|
},
|
|
|
|
_setExpectingAttribute: function()
|
|
{
|
|
if (this._isExpectingAttributeValue())
|
|
this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
|
|
this._condition.parseCondition |= this._parseConditions.ATTRIBUTE;
|
|
},
|
|
|
|
_setExpectingAttributeValue: function()
|
|
{
|
|
if (this._isExpectingAttribute())
|
|
this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE;
|
|
this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
|
|
},
|
|
|
|
_stringToken: function(cursor, stringEnds)
|
|
{
|
|
if (!this._isExpectingAttributeValue()) {
|
|
this.tokenType = null;
|
|
return cursor;
|
|
}
|
|
this.tokenType = this._attrValueTokenType();
|
|
if (stringEnds)
|
|
this._setExpectingAttribute();
|
|
return cursor;
|
|
},
|
|
|
|
_attrValueTokenType: function()
|
|
{
|
|
if (this._condition.parseCondition & this._parseConditions.LINKIFY) {
|
|
if (this._condition.parseCondition & this._parseConditions.A_NODE)
|
|
return "html-external-link";
|
|
return "html-resource-link";
|
|
}
|
|
return "html-attribute-value";
|
|
},
|
|
|
|
nextToken: function(cursor)
|
|
{
|
|
if (this._internalJavaScriptTokenizer) {
|
|
// Re-set line to force </script> detection first.
|
|
this.line = this._line;
|
|
if (cursor !== this._internalJavaScriptTokenizer._line.length) {
|
|
// Tokenizer is stateless, so restore its condition before tokenizing and save it after.
|
|
this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition;
|
|
var result = this._internalJavaScriptTokenizer.nextToken(cursor);
|
|
this.tokenType = this._internalJavaScriptTokenizer.tokenType;
|
|
this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition;
|
|
return result;
|
|
} else if (cursor !== this._line.length)
|
|
delete this._internalJavaScriptTokenizer;
|
|
} else if (this._internalCSSTokenizer) {
|
|
// Re-set line to force </style> detection first.
|
|
this.line = this._line;
|
|
if (cursor !== this._internalCSSTokenizer._line.length) {
|
|
// Tokenizer is stateless, so restore its condition before tokenizing and save it after.
|
|
this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition;
|
|
var result = this._internalCSSTokenizer.nextToken(cursor);
|
|
this.tokenType = this._internalCSSTokenizer.tokenType;
|
|
this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition;
|
|
return result;
|
|
} else if (cursor !== this._line.length)
|
|
delete this._internalCSSTokenizer;
|
|
}
|
|
|
|
var cursorOnEnter = cursor;
|
|
var gotoCase = 1;
|
|
while (1) {
|
|
switch (gotoCase)
|
|
// Following comment is replaced with generated state machine.
|
|
/*!re2c
|
|
re2c:define:YYCTYPE = "var";
|
|
re2c:define:YYCURSOR = cursor;
|
|
re2c:define:YYGETCONDITION = "this.getLexCondition";
|
|
re2c:define:YYSETCONDITION = "this.setLexCondition";
|
|
re2c:condprefix = "case this.case_";
|
|
re2c:condenumprefix = "this._lexConditions.";
|
|
re2c:yyfill:enable = 0;
|
|
re2c:labelprefix = "case ";
|
|
re2c:indent:top = 2;
|
|
re2c:indent:string = " ";
|
|
|
|
CommentContent = ([^-\r\n] | ("--" [^>]))*;
|
|
Comment = "<!--" CommentContent "-->";
|
|
CommentStart = "<!--" CommentContent [\r\n];
|
|
CommentEnd = CommentContent "-->";
|
|
|
|
DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
|
|
DocTypeContent = [^\r\n>]*;
|
|
|
|
ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
|
|
ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
|
|
|
|
StyleStart = "<" [Ss] [Tt] [Yy] [Ll] [Ee];
|
|
StyleEnd = "</" [Ss] [Tt] [Yy] [Ll] [Ee];
|
|
|
|
LT = "<" | "</";
|
|
GT = ">";
|
|
EqualSign = "=";
|
|
|
|
DoubleStringContent = [^\r\n\"]*;
|
|
SingleStringContent = [^\r\n\']*;
|
|
StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
|
|
DoubleStringStart = "\"" DoubleStringContent [\r\n];
|
|
DoubleStringEnd = DoubleStringContent "\"";
|
|
SingleStringStart = "'" SingleStringContent [\r\n];
|
|
SingleStringEnd = SingleStringContent "'";
|
|
|
|
Identifier = [^ \r\n"'<>\[\]=]+;
|
|
|
|
<INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
|
|
<INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
|
|
<COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
|
|
<COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
|
|
|
|
<INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
|
|
<DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
|
|
<DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
|
|
|
|
<INITIAL> ScriptStart => TAG
|
|
{
|
|
if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
|
|
// Do not tokenize script tag contents, keep lexer state, even though processing "<".
|
|
this.setLexCondition(this._lexConditions.INITIAL);
|
|
this.tokenType = null;
|
|
return cursor;
|
|
}
|
|
this.tokenType = "html-tag";
|
|
this._condition.parseCondition = this._parseConditions.SCRIPT;
|
|
this._setExpectingAttribute();
|
|
return cursor;
|
|
}
|
|
|
|
<INITIAL> ScriptEnd => TAG
|
|
{
|
|
this.tokenType = "html-tag";
|
|
this._condition.parseCondition = this._parseConditions.INITIAL;
|
|
return cursor;
|
|
}
|
|
|
|
<INITIAL> StyleStart => TAG
|
|
{
|
|
if (this._condition.parseCondition & this._parseConditions.STYLE) {
|
|
// Do not tokenize style tag contents, keep lexer state, even though processing "<".
|
|
this.setLexCondition(this._lexConditions.INITIAL);
|
|
this.tokenType = null;
|
|
return cursor;
|
|
}
|
|
this.tokenType = "html-tag";
|
|
this._condition.parseCondition = this._parseConditions.STYLE;
|
|
this._setExpectingAttribute();
|
|
return cursor;
|
|
}
|
|
|
|
<INITIAL> StyleEnd => TAG
|
|
{
|
|
this.tokenType = "html-tag";
|
|
this._condition.parseCondition = this._parseConditions.INITIAL;
|
|
return cursor;
|
|
}
|
|
|
|
<INITIAL> LT => TAG
|
|
{
|
|
if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) {
|
|
// Do not tokenize script and style tag contents, keep lexer state, even though processing "<".
|
|
this.setLexCondition(this._lexConditions.INITIAL);
|
|
this.tokenType = null;
|
|
return cursor;
|
|
}
|
|
|
|
this._condition.parseCondition = this._parseConditions.INITIAL;
|
|
this.tokenType = "html-tag";
|
|
return cursor;
|
|
}
|
|
|
|
<TAG> GT => INITIAL
|
|
{
|
|
this.tokenType = "html-tag";
|
|
if (this._condition.parseCondition & this._parseConditions.SCRIPT) {
|
|
if (!this._internalJavaScriptTokenizer) {
|
|
this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript");
|
|
this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition;
|
|
}
|
|
// Do not tokenize script tag contents.
|
|
return cursor;
|
|
}
|
|
|
|
if (this._condition.parseCondition & this._parseConditions.STYLE) {
|
|
if (!this._internalCSSTokenizer) {
|
|
this._internalCSSTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css");
|
|
this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.initialCondition;
|
|
}
|
|
// Do not tokenize style tag contents.
|
|
return cursor;
|
|
}
|
|
|
|
this._condition.parseCondition = this._parseConditions.INITIAL;
|
|
return cursor;
|
|
}
|
|
|
|
<TAG> StringLiteral { return this._stringToken(cursor, true); }
|
|
<TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
|
|
<DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
|
|
<DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
|
|
<TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
|
|
<SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
|
|
<SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
|
|
|
|
<TAG> EqualSign => TAG
|
|
{
|
|
if (this._isExpectingAttribute())
|
|
this._setExpectingAttributeValue();
|
|
this.tokenType = null;
|
|
return cursor;
|
|
}
|
|
|
|
<TAG> Identifier
|
|
{
|
|
if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) {
|
|
// Fall through if expecting attributes.
|
|
this.tokenType = null;
|
|
return cursor;
|
|
}
|
|
|
|
if (this._condition.parseCondition === this._parseConditions.INITIAL) {
|
|
this.tokenType = "html-tag";
|
|
this._setExpectingAttribute();
|
|
var token = this._line.substring(cursorOnEnter, cursor);
|
|
if (token === "a")
|
|
this._condition.parseCondition |= this._parseConditions.A_NODE;
|
|
else if (this._condition.parseCondition & this._parseConditions.A_NODE)
|
|
this._condition.parseCondition ^= this._parseConditions.A_NODE;
|
|
} else if (this._isExpectingAttribute()) {
|
|
var token = this._line.substring(cursorOnEnter, cursor);
|
|
if (token === "href" || token === "src")
|
|
this._condition.parseCondition |= this._parseConditions.LINKIFY;
|
|
else if (this._condition.parseCondition |= this._parseConditions.LINKIFY)
|
|
this._condition.parseCondition ^= this._parseConditions.LINKIFY;
|
|
this.tokenType = "html-attribute-name";
|
|
} else if (this._isExpectingAttributeValue())
|
|
this.tokenType = this._attrValueTokenType();
|
|
else
|
|
this.tokenType = null;
|
|
return cursor;
|
|
}
|
|
<*> [^] { this.tokenType = null; return cursor; }
|
|
*/
|
|
}
|
|
}
|
|
}
|
|
|
|
WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;
|