JSwikiparser
Источник: http://www.ivan.fomichev.name/2008/04/javascript-creole-10-wiki-markup-parser.html
What you see is a live demonstration of Creole 1.0 parser, written entirely in JavaScript. Creole is a wiki markup language, intended to be a cross standard for various wiki markup dialects.
Источник: http://www.ivan.fomichev.name/2008/04/javascript-creole-10-wiki-markup-parser.html
/*
* Copyright (c) 2008 Ivan Fomichev
*
* Portions Copyright (c) 2007 Chris Purcell
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
if (!Parse) { var Parse = {}; }
if (!Parse.Simple) { Parse.Simple = {}; }
Parse.Simple.Base = function(root, options) {
if (!arguments.length) { return; }
this.root = new this.ruleConstructor(root);
this.options = options;
};
Parse.Simple.Base.prototype = {
ruleConstructor: null,
root: null,
options: null,
parse: function(node, data, options) {
if (options) {
for (i in this.options) {
if (typeof options[i] == 'undefined') { options[i] = this.options[i]; }
}
}
else {
options = this.options;
}
this.root.apply(node, data, options);
}
};
Parse.Simple.Base.prototype.constructor = Parse.Simple.Base;
Parse.Simple.Base.Rule = function(params) {
if (!arguments.length) { return; }
for (var p in params) { this[p] = params[p]; }
if (!this.children) { this.children = []; }
};
Parse.Simple.Base.prototype.ruleConstructor = Parse.Simple.Base.Rule;
Parse.Simple.Base.Rule.prototype = {
regex: null,
capture: null,
replaceRegex: null,
replaceString: null,
tag: null,
attrs: null,
children: null,
match: function(data, options) {
return data.match(this.regex);
},
build: function(node, r, options) {
var data;
if (this.capture !== null) {
data = r[this.capture];
}
var target;
if (this.tag) {
target = document.createElement(this.tag);
node.appendChild(target);
}
else { target = node; }
if (data) {
if (this.replaceRegex) {
data = data.replace(this.replaceRegex, this.replaceString);
}
this.apply(target, data, options);
}
if (this.attrs) {
for (var i in this.attrs) {
target.setAttribute(i, this.attrs[i]);
if (i == 'class') { target.className = this.attrs[i]; } // for IE
}
}
return this;
},
apply: function(node, data, options) {
var tail = '' + data;
var matches = [];
if (!this.fallback.apply) {
this.fallback = new this.constructor(this.fallback);
}
while (true) {
var best = false;
var rule = false;
for (var i = 0; i < this.children.length; i++) {
if (typeof matches[i] == 'undefined') {
if (!this.children[i].match) {
this.children[i] = new this.constructor(this.children[i]);
}
matches[i] = this.children[i].match(tail, options);
}
if (matches[i] && (!best || best.index > matches[i].index)) {
best = matches[i];
rule = this.children[i];
if (best.index == 0) { break; }
}
}
var pos = best ? best.index : tail.length;
if (pos > 0) {
this.fallback.apply(node, tail.substring(0, pos), options);
}
if (!best) { break; }
if (!rule.build) { rule = new this.constructor(rule); }
rule.build(node, best, options);
var chopped = best.index + best[0].length;
tail = tail.substring(chopped);
for (var i = 0; i < this.children.length; i++) {
if (matches[i]) {
if (matches[i].index >= chopped) {
matches[i].index -= chopped;
}
else {
matches[i] = void 0;
}
}
}
}
return this;
},
fallback: {
apply: function(node, data, options) {
node.appendChild(document.createTextNode(data));
}
}
};
Parse.Simple.Base.Rule.prototype.constructor = Parse.Simple.Base.Rule;
Parse.Simple.Creole = function(options) {
var rx = {};
rx.link = '[^\\]|~\\n]*(?:(?:\\](?!\\])|~.)[^\\]|~\\n]*)*';
rx.linkText = '[^\\]~\\n]*(?:(?:\\](?!\\])|~.)[^\\]~\\n]*)*';
rx.uriPrefix = '\\b(?:(?:https?|ftp)://|mailto:)';
rx.uri = rx.uriPrefix + rx.link;
rx.rawUri = rx.uriPrefix + '\\S*[^\\s!"\',.:;?]';
rx.interwikiPrefix = '[\\w.]+:';
rx.interwikiLink = rx.interwikiPrefix + rx.link;
var formatLink = function(link, format) {
format = format instanceof Array ? format : [ format ];
if (typeof format[1] == 'undefined') { format[1] = ''; }
return format[0] + link + format[1];
};
var g = {
hr: { tag: 'hr', regex: /(^|\n)\s*----\s*(\n|$)/ },
br: { tag: 'br', regex: /\\\\/ },
preBlock: { tag: 'pre', capture: 2,
regex: /(^|\n)\{\{\{\n((.*\n)*?)\}\}\}(\n|$)/,
replaceRegex: /^ ([ \t]*\}\}\})/gm,
replaceString: '$1' },
tt: { tag: 'tt',
regex: /\{\{\{(.*?\}\}\}+)/, capture: 1,
replaceRegex: /\}\}\}$/, replaceString: '' },
ulist: { tag: 'ul', capture: 0,
regex: /(^|\n)([ \t]*\*[^*#].*(\n|$)([ \t]*[^\s*#].*(\n|$))*([ \t]*[*#]{2}.*(\n|$))*)+/ },
olist: { tag: 'ol', capture: 0,
regex: /(^|\n)([ \t]*#[^*#].*(\n|$)([ \t]*[^\s*#].*(\n|$))*([ \t]*[*#]{2}.*(\n|$))*)+/ },
li: { tag: 'li', capture: 0,
regex: /[ \t]*([*#]).+(\n[ \t]*[^*#\s].*)*(\n[ \t]*\1[*#].+)*/,
replaceRegex: /(^|\n)[ \t]*[*#]/g, replaceString: '$1' },
table: { tag: 'table', capture: 0,
regex: /(^|\n)(\|.*?[ \t]*(\n|$))+/ },
tr: { tag: 'tr', capture: 2, regex: /(^|\n)(\|.*?)\|?[ \t]*(\n|$)/ },
th: { tag: 'th', regex: /\|+=([^|]*)/, capture: 1 },
td: { tag: 'td', capture: 1,
regex: /\|+([^|~]*(~(.|(?=\n)|$)[^|~]*)*)/ },
singleLine: { regex: /.+/, capture: 0 },
paragraph: { tag: 'p', capture: 0,
regex: /(^|\n)([ \t]*\S.*(\n|$))+/ },
text: { capture: 0, regex: /(^|\n)([ \t]*[^\s].*(\n|$))+/ },
strong: { tag: 'strong', capture: 1,
regex: /\*\*([^*~]*((\*(?!\*)|~(.|(?=\n)|$))[^*~]*)*)(\*\*|\n|$)/ },
em: { tag: 'em', capture: 1,
regex: '\\/\\/(((?!' + rx.uriPrefix + ')[^\\/~])*' +
'((' + rx.rawUri + '|\\/(?!\\/)|~(.|(?=\\n)|$))' +
'((?!' + rx.uriPrefix + ')[^\\/~])*)*)(\\/\\/|\\n|$)' },
img: { regex: '\\{\\{((?!\\{)[^|}\\n]*(?:}(?!})[^|}\\n]*)*)\\|' +
'([^}~\\n]*((}(?!})|~.)[^}~\\n]*)*)}}',
build: function(node, r, options) {
var img = document.createElement('img');
img.src = r[1];
img.alt = r[2].replace(/~(.)/g, '$1');
node.appendChild(img);
} },
namedUri: { regex: '\\[\\[(' + rx.uri + ')\\|(' + rx.linkText + ')\\]\\]',
build: function(node, r, options) {
var link = document.createElement('a');
link.href = r[1];
if (options && options.isPlainUri) {
link.appendChild(document.createTextNode(r[2]));
}
else {
this.apply(link, r[2], options);
}
node.appendChild(link);
} },
namedLink: { regex: '\\[\\[(' + rx.link + ')\\|(' + rx.linkText + ')\\]\\]',
build: function(node, r, options) {
var link = document.createElement('a');
link.href = options && options.linkFormat
? formatLink(r[1].replace(/~(.)/g, '$1'), options.linkFormat)
: r[1].replace(/~(.)/g, '$1');
this.apply(link, r[2], options);
node.appendChild(link);
} },
unnamedUri: { regex: '\\[\\[(' + rx.uri + ')\\]\\]',
build: 'dummy' },
unnamedLink: { regex: '\\[\\[(' + rx.link + ')\\]\\]',
build: 'dummy' },
unnamedInterwikiLink: { regex: '\\[\\[(' + rx.interwikiLink + ')\\]\\]',
build: 'dummy' },
rawUri: { regex: '(' + rx.rawUri + ')',
build: 'dummy' },
escapedSequence: { regex: '~(' + rx.rawUri + '|.)', capture: 1,
tag: 'span', attrs: { 'class': 'escaped' } },
escapedSymbol: { regex: /~(.)/, capture: 1,
tag: 'span', attrs: { 'class': 'escaped' } }
};
g.unnamedUri.build = g.rawUri.build = function(node, r, options) {
if (!options) { options = {}; }
options.isPlainUri = true;
g.namedUri.build.call(this, node, Array(r[0], r[1], r[1]), options);
};
g.unnamedLink.build = function(node, r, options) {
g.namedLink.build.call(this, node, Array(r[0], r[1], r[1]), options);
};
g.namedInterwikiLink = { regex: '\\[\\[(' + rx.interwikiLink + ')\\|(' + rx.linkText + ')\\]\\]',
build: function(node, r, options) {
var link = document.createElement('a');
var m, f;
if (options && options.interwiki) {
m = r[1].match(/(.*?):(.*)/);
f = options.interwiki[m[1]];
}
if (typeof f == 'undefined') {
if (!g.namedLink.apply) {
g.namedLink = new this.constructor(g.namedLink);
}
return g.namedLink.build.call(g.namedLink, node, r, options);
}
link.href = formatLink(m[2].replace(/~(.)/g, '$1'), f);
this.apply(link, r[2], options);
node.appendChild(link);
}
};
g.unnamedInterwikiLink.build = function(node, r, options) {
g.namedInterwikiLink.build.call(this, node, Array(r[0], r[1], r[1]), options);
};
g.namedUri.children = g.unnamedUri.children = g.rawUri.children =
g.namedLink.children = g.unnamedLink.children =
g.namedInterwikiLink.children = g.unnamedInterwikiLink.children =
[ g.escapedSymbol, g.img ];
for (var i = 1; i <= 6; i++) {
g['h' + i] = { tag: 'h' + i, capture: 2,
regex: '(^|\\n)[ \\t]*={' + i + '}[ \\t]' +
'([^~]*?(~(.|(?=\\n)|$))*)[ \\t]*=*\\s*(\\n|$)'
};
}
g.ulist.children = g.olist.children = [ g.li ];
g.li.children = [ g.ulist, g.olist ];
g.li.fallback = g.text;
g.table.children = [ g.tr ];
g.tr.children = [ g.th, g.td ];
g.td.children = [ g.singleLine ];
g.th.children = [ g.singleLine ];
g.h1.children = g.h2.children = g.h3.children =
g.h4.children = g.h5.children = g.h6.children =
g.singleLine.children = g.paragraph.children =
g.text.children = g.strong.children = g.em.children =
[ g.escapedSequence, g.strong, g.em, g.br, g.rawUri,
g.namedUri, g.namedInterwikiLink, g.namedLink,
g.unnamedUri, g.unnamedInterwikiLink, g.unnamedLink,
g.tt, g.img ];
g.root = {
children: [ g.h1, g.h2, g.h3, g.h4, g.h5, g.h6,
g.hr, g.ulist, g.olist, g.preBlock, g.table ],
fallback: { children: [ g.paragraph ] }
};
Parse.Simple.Base.call(this, g.root, options);
};
Parse.Simple.Creole.prototype = new Parse.Simple.Base();
Parse.Simple.Creole.prototype.constructor = Parse.Simple.Creole;