JSwikiparser

Материал из КинтВики
Перейти к: навигация, поиск


Источник: http://www.ivan.fomichev.name/2008/04/javascript-creole-10-wiki-markup-parser.html

What you see is a live demonstration of Creole 1.0 parser, written entirely in JavaScript. Creole is a wiki markup language, intended to be a cross standard for various wiki markup dialects.


Источник: http://www.ivan.fomichev.name/2008/04/javascript-creole-10-wiki-markup-parser.html



<source lang=js> /*

* Copyright (c) 2008 Ivan Fomichev
*
* Portions Copyright (c) 2007 Chris Purcell
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

if (!Parse) { var Parse = {}; } if (!Parse.Simple) { Parse.Simple = {}; }

Parse.Simple.Base = function(root, options) {

   if (!arguments.length) { return; }
   this.root = new this.ruleConstructor(root);
   this.options = options;

};

Parse.Simple.Base.prototype = {

   ruleConstructor: null,
   root: null,
   options: null,
   parse: function(node, data, options) {
       if (options) {
           for (i in this.options) {
               if (typeof options[i] == 'undefined') { options[i] = this.options[i]; }
           }
       }
       else {
           options = this.options;
       }
       this.root.apply(node, data, options);
   }

};

Parse.Simple.Base.prototype.constructor = Parse.Simple.Base;

Parse.Simple.Base.Rule = function(params) {

   if (!arguments.length) { return; }
   for (var p in params) { this[p] = params[p]; }
   if (!this.children) { this.children = []; }

};

Parse.Simple.Base.prototype.ruleConstructor = Parse.Simple.Base.Rule;

Parse.Simple.Base.Rule.prototype = {

   regex: null,
   capture: null,
   replaceRegex: null,
   replaceString: null,
   tag: null,
   attrs: null,
   children: null,
   match: function(data, options) {
       return data.match(this.regex);
   },
   build: function(node, r, options) {
       var data;
       if (this.capture !== null) {
           data = r[this.capture];
       }
       var target;
       if (this.tag) {
           target = document.createElement(this.tag);
           node.appendChild(target);
       }
       else { target = node; }
       if (data) {
           if (this.replaceRegex) {
               data = data.replace(this.replaceRegex, this.replaceString);
           }
           this.apply(target, data, options);
       }
       if (this.attrs) {
           for (var i in this.attrs) {
               target.setAttribute(i, this.attrs[i]);
               if (i == 'class') { target.className = this.attrs[i]; } // for IE
           }
       }
       return this;
   },
   apply: function(node, data, options) {
       var tail =  + data;
       var matches = [];
       if (!this.fallback.apply) {
           this.fallback = new this.constructor(this.fallback);
       }
       while (true) {
           var best = false;
           var rule  = false;
           for (var i = 0; i < this.children.length; i++) {
               if (typeof matches[i] == 'undefined') {
                   if (!this.children[i].match) {
                       this.children[i] = new this.constructor(this.children[i]);
                   }
                   matches[i] = this.children[i].match(tail, options);
               }
               if (matches[i] && (!best || best.index > matches[i].index)) {
                   best = matches[i];
                   rule = this.children[i];
                   if (best.index == 0) { break; }
               }
           }
               
           var pos = best ? best.index : tail.length;
           if (pos > 0) {
               this.fallback.apply(node, tail.substring(0, pos), options);
           }
           
           if (!best) { break; }
           if (!rule.build) { rule = new this.constructor(rule); }
           rule.build(node, best, options);
           var chopped = best.index + best[0].length;
           tail = tail.substring(chopped);
           for (var i = 0; i < this.children.length; i++) {
               if (matches[i]) {
                   if (matches[i].index >= chopped) {
                       matches[i].index -= chopped;
                   }
                   else {
                       matches[i] = void 0;
                   }
               }
           }
       }
       return this;
   },
   fallback: {
       apply: function(node, data, options) {
           node.appendChild(document.createTextNode(data));
       }
   }    

};

Parse.Simple.Base.Rule.prototype.constructor = Parse.Simple.Base.Rule;

Parse.Simple.Creole = function(options) {

   var rx = {};
   rx.link = '[^\\]|~\\n]*(?:(?:\\](?!\\])|~.)[^\\]|~\\n]*)*';
   rx.linkText = '[^\\]~\\n]*(?:(?:\\](?!\\])|~.)[^\\]~\\n]*)*';
   rx.uriPrefix = '\\b(?:(?:https?|ftp)://|mailto:)';
   rx.uri = rx.uriPrefix + rx.link;
   rx.rawUri = rx.uriPrefix + '\\S*[^\\s!"\',.:;?]';
   rx.interwikiPrefix = '[\\w.]+:';
   rx.interwikiLink = rx.interwikiPrefix + rx.link;
   var formatLink = function(link, format) {
       format = format instanceof Array ? format : [ format ];
       if (typeof format[1] == 'undefined') { format[1] = ; }
       return format[0] + link + format[1];
   };
   var g = {
       hr: { tag: 'hr', regex: /(^|\n)\s*----\s*(\n|$)/ },
       br: { tag: 'br', regex: /\\\\/ },
       
       preBlock: { tag: 'pre', capture: 2,
           regex: /(^|\n)\{\{\{\n((.*\n)*?)\}\}\}(\n|$)/,
           replaceRegex: /^ ([ \t]*\}\}\})/gm,
           replaceString: '$1' },
       tt: { tag: 'tt',
           regex: /\{\{\{(.*?\}\}\}+)/, capture: 1,
           replaceRegex: /\}\}\}$/, replaceString:  },
       ulist: { tag: 'ul', capture: 0,
           regex: /(^|\n)([ \t]*\*[^*#].*(\n|$)([ \t]*[^\s*#].*(\n|$))*([ \t]*[*#]{2}.*(\n|$))*)+/ },
       olist: { tag: 'ol', capture: 0,
           regex: /(^|\n)([ \t]*#[^*#].*(\n|$)([ \t]*[^\s*#].*(\n|$))*([ \t]*[*#]{2}.*(\n|$))*)+/ },
       li: { tag: 'li', capture: 0,
           regex: /[ \t]*([*#]).+(\n[ \t]*[^*#\s].*)*(\n[ \t]*\1[*#].+)*/,
           replaceRegex: /(^|\n)[ \t]*[*#]/g, replaceString: '$1' },
       table: { tag: 'table', capture: 0,
           regex: /(^|\n)(\|.*?[ \t]*(\n|$))+/ },
       tr: { tag: 'tr', capture: 2, regex: /(^|\n)(\|.*?)\|?[ \t]*(\n|$)/ },
       th: { tag: 'th', regex: /\|+=([^|]*)/, capture: 1 },
       td: { tag: 'td', capture: 1,
           regex: /\|+([^|~]*(~(.|(?=\n)|$)[^|~]*)*)/ },
       singleLine: { regex: /.+/, capture: 0 },
       paragraph: { tag: 'p', capture: 0,
           regex: /(^|\n)([ \t]*\S.*(\n|$))+/ },
       text: { capture: 0, regex: /(^|\n)([ \t]*[^\s].*(\n|$))+/ },
       strong: { tag: 'strong', capture: 1,
           regex: /\*\*([^*~]*((\*(?!\*)|~(.|(?=\n)|$))[^*~]*)*)(\*\*|\n|$)/ },
       em: { tag: 'em', capture: 1,
           regex: '\\/\\/(((?!' + rx.uriPrefix + ')[^\\/~])*' +
                  '((' + rx.rawUri + '|\\/(?!\\/)|~(.|(?=\\n)|$))' +
                  '((?!' + rx.uriPrefix + ')[^\\/~])*)*)(\\/\\/|\\n|$)' },
       img: { regex: '\\{\\{((?!\\{)[^|}\\n]*(?:}(?!})[^|}\\n]*)*)\\|' +
                     '([^}~\\n]*((}(?!})|~.)[^}~\\n]*)*)}}',
           build: function(node, r, options) {
               var img = document.createElement('img');
               img.src = r[1];
               img.alt = r[2].replace(/~(.)/g, '$1');
               node.appendChild(img);
           } },
       namedUri: { regex: '\\[\\[(' + rx.uri + ')\\|(' + rx.linkText + ')\\]\\]',
           build: function(node, r, options) {
               var link = document.createElement('a');
               link.href = r[1];
               if (options && options.isPlainUri) {
                   link.appendChild(document.createTextNode(r[2]));
               }
               else {
                   this.apply(link, r[2], options);
               }
               node.appendChild(link);
           } },
       namedLink: { regex: '\\[\\[(' + rx.link + ')\\|(' + rx.linkText + ')\\]\\]',
           build: function(node, r, options) {
               var link = document.createElement('a');
               
               link.href = options && options.linkFormat
                   ? formatLink(r[1].replace(/~(.)/g, '$1'), options.linkFormat)
                   : r[1].replace(/~(.)/g, '$1');
               this.apply(link, r[2], options);
               
               node.appendChild(link);
           } },
       unnamedUri: { regex: '\\[\\[(' + rx.uri + ')\\]\\]',
           build: 'dummy' },
       unnamedLink: { regex: '\\[\\[(' + rx.link + ')\\]\\]',
           build: 'dummy' },
       unnamedInterwikiLink: { regex: '\\[\\[(' + rx.interwikiLink + ')\\]\\]',
           build: 'dummy' },
       rawUri: { regex: '(' + rx.rawUri + ')',
           build: 'dummy' },
       escapedSequence: { regex: '~(' + rx.rawUri + '|.)', capture: 1,
           tag: 'span', attrs: { 'class': 'escaped' } },
       escapedSymbol: { regex: /~(.)/, capture: 1,
           tag: 'span', attrs: { 'class': 'escaped' } }
   };
   g.unnamedUri.build = g.rawUri.build = function(node, r, options) {
       if (!options) { options = {}; }
       options.isPlainUri = true;
       g.namedUri.build.call(this, node, Array(r[0], r[1], r[1]), options);
   };
   g.unnamedLink.build = function(node, r, options) {
       g.namedLink.build.call(this, node, Array(r[0], r[1], r[1]), options);
   };
   g.namedInterwikiLink = { regex: '\\[\\[(' + rx.interwikiLink + ')\\|(' + rx.linkText + ')\\]\\]',
       build: function(node, r, options) {
               var link = document.createElement('a');
               
               var m, f;
               if (options && options.interwiki) {
               m = r[1].match(/(.*?):(.*)/);
               f = options.interwiki[m[1]];
           }
           
           if (typeof f == 'undefined') {
               if (!g.namedLink.apply) {
                   g.namedLink = new this.constructor(g.namedLink);
               }
               return g.namedLink.build.call(g.namedLink, node, r, options);
           }
           link.href = formatLink(m[2].replace(/~(.)/g, '$1'), f);
           
           this.apply(link, r[2], options);
           
           node.appendChild(link);
       }
   };
   g.unnamedInterwikiLink.build = function(node, r, options) {
       g.namedInterwikiLink.build.call(this, node, Array(r[0], r[1], r[1]), options);
   };
   g.namedUri.children = g.unnamedUri.children = g.rawUri.children =
           g.namedLink.children = g.unnamedLink.children =
           g.namedInterwikiLink.children = g.unnamedInterwikiLink.children =
       [ g.escapedSymbol, g.img ];
   for (var i = 1; i <= 6; i++) {
       g['h' + i] = { tag: 'h' + i, capture: 2,
           regex: '(^|\\n)[ \\t]*={' + i + '}[ \\t]' +
                  '([^~]*?(~(.|(?=\\n)|$))*)[ \\t]*=*\\s*(\\n|$)'
       };
   }
   g.ulist.children = g.olist.children = [ g.li ];
   g.li.children = [ g.ulist, g.olist ];
   g.li.fallback = g.text;
   g.table.children = [ g.tr ];
   g.tr.children = [ g.th, g.td ];
   g.td.children = [ g.singleLine ];
   g.th.children = [ g.singleLine ];
   g.h1.children = g.h2.children = g.h3.children =
           g.h4.children = g.h5.children = g.h6.children =
           g.singleLine.children = g.paragraph.children =
           g.text.children = g.strong.children = g.em.children =
       [ g.escapedSequence, g.strong, g.em, g.br, g.rawUri,
           g.namedUri, g.namedInterwikiLink, g.namedLink,
           g.unnamedUri, g.unnamedInterwikiLink, g.unnamedLink,
           g.tt, g.img ];
   g.root = {
       children: [ g.h1, g.h2, g.h3, g.h4, g.h5, g.h6,
           g.hr, g.ulist, g.olist, g.preBlock, g.table ],
       fallback: { children: [ g.paragraph ] }
   };
   Parse.Simple.Base.call(this, g.root, options);

};

Parse.Simple.Creole.prototype = new Parse.Simple.Base();

Parse.Simple.Creole.prototype.constructor = Parse.Simple.Creole; </source>