markdown.js (43359B)
1 // Released under MIT license 2 // Copyright (c) 2009-2010 Dominic Baggott 3 // Copyright (c) 2009-2010 Ash Berlin 4 // Copyright (c) 2011 Christoph Dorn <christoph@christophdorn.com> (http://www.christophdorn.com) 5 6 (function( expose ) { 7 8 /** 9 * class Markdown 10 * 11 * Markdown processing in Javascript done right. We have very particular views 12 * on what constitutes 'right' which include: 13 * 14 * - produces well-formed HTML (this means that em and strong nesting is 15 * important) 16 * 17 * - has an intermediate representation to allow processing of parsed data (We 18 * in fact have two, both as [JsonML]: a markdown tree and an HTML tree). 19 * 20 * - is easily extensible to add new dialects without having to rewrite the 21 * entire parsing mechanics 22 * 23 * - has a good test suite 24 * 25 * This implementation fulfills all of these (except that the test suite could 26 * do with expanding to automatically run all the fixtures from other Markdown 27 * implementations.) 28 * 29 * ##### Intermediate Representation 30 * 31 * *TODO* Talk about this :) Its JsonML, but document the node names we use. 32 * 33 * [JsonML]: http://jsonml.org/ "JSON Markup Language" 34 **/ 35 var Markdown = expose.Markdown = function Markdown(dialect) { 36 switch (typeof dialect) { 37 case "undefined": 38 this.dialect = Markdown.dialects.Gruber; 39 break; 40 case "object": 41 this.dialect = dialect; 42 break; 43 default: 44 if (dialect in Markdown.dialects) { 45 this.dialect = Markdown.dialects[dialect]; 46 } 47 else { 48 throw new Error("Unknown Markdown dialect '" + String(dialect) + "'"); 49 } 50 break; 51 } 52 this.em_state = []; 53 this.strong_state = []; 54 this.debug_indent = ""; 55 } 56 57 /** 58 * parse( markdown, [dialect] ) -> JsonML 59 * - markdown (String): markdown string to parse 60 * - dialect (String | Dialect): the dialect to use, defaults to gruber 61 * 62 * Parse `markdown` and return a markdown document as a Markdown.JsonML tree. 63 **/ 64 expose.parse = function( source, dialect ) { 65 // dialect will default if undefined 66 var md = new Markdown( dialect ); 67 return md.toTree( source ); 68 } 69 70 /** 71 * toHTML( markdown, [dialect] ) -> String 72 * toHTML( md_tree ) -> String 73 * - markdown (String): markdown string to parse 74 * - md_tree (Markdown.JsonML): parsed markdown tree 75 * 76 * Take markdown (either as a string or as a JsonML tree) and run it through 77 * [[toHTMLTree]] then turn it into a well-formated HTML fragment. 78 **/ 79 expose.toHTML = function toHTML( source , dialect , options ) { 80 var input = expose.toHTMLTree( source , dialect , options ); 81 82 return expose.renderJsonML( input ); 83 } 84 85 /** 86 * toHTMLTree( markdown, [dialect] ) -> JsonML 87 * toHTMLTree( md_tree ) -> JsonML 88 * - markdown (String): markdown string to parse 89 * - dialect (String | Dialect): the dialect to use, defaults to gruber 90 * - md_tree (Markdown.JsonML): parsed markdown tree 91 * 92 * Turn markdown into HTML, represented as a JsonML tree. If a string is given 93 * to this function, it is first parsed into a markdown tree by calling 94 * [[parse]]. 95 **/ 96 expose.toHTMLTree = function toHTMLTree( input, dialect , options ) { 97 // convert string input to an MD tree 98 if ( typeof input ==="string" ) input = this.parse( input, dialect ); 99 100 // Now convert the MD tree to an HTML tree 101 102 // remove references from the tree 103 var attrs = extract_attr( input ), 104 refs = {}; 105 106 if ( attrs && attrs.references ) { 107 refs = attrs.references; 108 } 109 110 var html = convert_tree_to_html( input, refs , options ); 111 merge_text_nodes( html ); 112 return html; 113 } 114 115 var mk_block = Markdown.mk_block = function(block, trail, line) { 116 // Be helpful for default case in tests. 117 if ( arguments.length == 1 ) trail = "\n\n"; 118 119 var s = new String(block); 120 s.trailing = trail; 121 // To make it clear its not just a string 122 s.toSource = function() { 123 return "Markdown.mk_block( " + 124 uneval(block) + 125 ", " + 126 uneval(trail) + 127 ", " + 128 uneval(line) + 129 " )" 130 } 131 132 if (line != undefined) 133 s.lineNumber = line; 134 135 return s; 136 } 137 138 function count_lines( str ) { 139 var n = 0, i = -1;; 140 while ( ( i = str.indexOf('\n', i+1) ) != -1) n++; 141 return n; 142 } 143 144 // Internal - split source into rough blocks 145 Markdown.prototype.split_blocks = function splitBlocks( input, startLine ) { 146 // [\s\S] matches _anything_ (newline or space) 147 var re = /([\s\S]+?)($|\n(?:\s*\n|$)+)/g, 148 blocks = [], 149 m; 150 151 var line_no = 1; 152 153 if ( ( m = /^(\s*\n)/.exec(input) ) != null ) { 154 // skip (but count) leading blank lines 155 line_no += count_lines( m[0] ); 156 re.lastIndex = m[0].length; 157 } 158 159 while ( ( m = re.exec(input) ) != null ) { 160 blocks.push( mk_block( m[1], m[2], line_no ) ); 161 line_no += count_lines( m[0] ); 162 } 163 164 return blocks; 165 } 166 167 /** 168 * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ] 169 * - block (String): the block to process 170 * - next (Array): the following blocks 171 * 172 * Process `block` and return an array of JsonML nodes representing `block`. 173 * 174 * It does this by asking each block level function in the dialect to process 175 * the block until one can. Succesful handling is indicated by returning an 176 * array (with zero or more JsonML nodes), failure by a false value. 177 * 178 * Blocks handlers are responsible for calling [[Markdown#processInline]] 179 * themselves as appropriate. 180 * 181 * If the blocks were split incorrectly or adjacent blocks need collapsing you 182 * can adjust `next` in place using shift/splice etc. 183 * 184 * If any of this default behaviour is not right for the dialect, you can 185 * define a `__call__` method on the dialect that will get invoked to handle 186 * the block processing. 187 */ 188 Markdown.prototype.processBlock = function processBlock( block, next ) { 189 var cbs = this.dialect.block, 190 ord = cbs.__order__; 191 192 if ( "__call__" in cbs ) { 193 return cbs.__call__.call(this, block, next); 194 } 195 196 for ( var i = 0; i < ord.length; i++ ) { 197 //D:this.debug( "Testing", ord[i] ); 198 var res = cbs[ ord[i] ].call( this, block, next ); 199 if ( res ) { 200 //D:this.debug(" matched"); 201 if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) ) ) 202 this.debug(ord[i], "didn't return a proper array"); 203 //D:this.debug( "" ); 204 return res; 205 } 206 } 207 208 // Uhoh! no match! Should we throw an error? 209 return []; 210 } 211 212 Markdown.prototype.processInline = function processInline( block ) { 213 return this.dialect.inline.__call__.call( this, String( block ) ); 214 } 215 216 /** 217 * Markdown#toTree( source ) -> JsonML 218 * - source (String): markdown source to parse 219 * 220 * Parse `source` into a JsonML tree representing the markdown document. 221 **/ 222 // custom_tree means set this.tree to `custom_tree` and restore old value on return 223 Markdown.prototype.toTree = function toTree( source, custom_root ) { 224 var blocks = source instanceof Array 225 ? source 226 : this.split_blocks( source ); 227 228 // Make tree a member variable so its easier to mess with in extensions 229 var old_tree = this.tree; 230 try { 231 this.tree = custom_root || this.tree || [ "markdown" ]; 232 233 blocks: 234 while ( blocks.length ) { 235 var b = this.processBlock( blocks.shift(), blocks ); 236 237 // Reference blocks and the like won't return any content 238 if ( !b.length ) continue blocks; 239 240 this.tree.push.apply( this.tree, b ); 241 } 242 return this.tree; 243 } 244 finally { 245 if ( custom_root ) 246 this.tree = old_tree; 247 } 248 249 } 250 251 // Noop by default 252 Markdown.prototype.debug = function () { 253 var args = Array.prototype.slice.call( arguments); 254 args.unshift(this.debug_indent); 255 if (typeof print !== "undefined") 256 print.apply( print, args ); 257 if (typeof console !== "undefined" && typeof console.log !== "undefined") 258 console.log.apply( null, args ); 259 } 260 261 Markdown.prototype.loop_re_over_block = function( re, block, cb ) { 262 // Dont use /g regexps with this 263 var m, 264 b = block.valueOf(); 265 266 while ( b.length && (m = re.exec(b) ) != null) { 267 b = b.substr( m[0].length ); 268 cb.call(this, m); 269 } 270 return b; 271 } 272 273 /** 274 * Markdown.dialects 275 * 276 * Namespace of built-in dialects. 277 **/ 278 Markdown.dialects = {}; 279 280 /** 281 * Markdown.dialects.Gruber 282 * 283 * The default dialect that follows the rules set out by John Gruber's 284 * markdown.pl as closely as possible. Well actually we follow the behaviour of 285 * that script which in some places is not exactly what the syntax web page 286 * says. 287 **/ 288 Markdown.dialects.Gruber = { 289 block: { 290 atxHeader: function atxHeader( block, next ) { 291 var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ ); 292 293 if ( !m ) return undefined; 294 295 var header = [ "header", { level: m[ 1 ].length } ]; 296 Array.prototype.push.apply(header, this.processInline(m[ 2 ])); 297 298 if ( m[0].length < block.length ) 299 next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); 300 301 return [ header ]; 302 }, 303 304 setextHeader: function setextHeader( block, next ) { 305 var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ ); 306 307 if ( !m ) return undefined; 308 309 var level = ( m[ 2 ] === "=" ) ? 1 : 2; 310 var header = [ "header", { level : level }, m[ 1 ] ]; 311 312 if ( m[0].length < block.length ) 313 next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); 314 315 return [ header ]; 316 }, 317 318 code: function code( block, next ) { 319 // | Foo 320 // |bar 321 // should be a code block followed by a paragraph. Fun 322 // 323 // There might also be adjacent code block to merge. 324 325 var ret = [], 326 re = /^(?: {0,3}\t| {4})(.*)\n?/, 327 lines; 328 329 // 4 spaces + content 330 var m = block.match( re ); 331 332 if ( !m ) return undefined; 333 334 block_search: 335 do { 336 // Now pull out the rest of the lines 337 var b = this.loop_re_over_block( 338 re, block.valueOf(), function( m ) { ret.push( m[1] ) } ); 339 340 if (b.length) { 341 // Case alluded to in first comment. push it back on as a new block 342 next.unshift( mk_block(b, block.trailing) ); 343 break block_search; 344 } 345 else if (next.length) { 346 // Check the next block - it might be code too 347 var m = next[0].match( re ); 348 349 if ( !m ) break block_search; 350 351 // Pull how how many blanks lines follow - minus two to account for .join 352 ret.push ( block.trailing.replace(/[^\n]/g, '').substring(2) ); 353 354 block = next.shift(); 355 } 356 else 357 break block_search; 358 } while (true); 359 360 return [ [ "code_block", ret.join("\n") ] ]; 361 }, 362 363 horizRule: function horizRule( block, next ) { 364 // this needs to find any hr in the block to handle abutting blocks 365 var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ ); 366 367 if ( !m ) { 368 return undefined; 369 } 370 371 var jsonml = [ [ "hr" ] ]; 372 373 // if there's a leading abutting block, process it 374 if ( m[ 1 ] ) { 375 jsonml.unshift.apply( jsonml, this.processBlock( m[ 1 ], [] ) ); 376 } 377 378 // if there's a trailing abutting block, stick it into next 379 if ( m[ 3 ] ) { 380 next.unshift( mk_block( m[ 3 ] ) ); 381 } 382 383 return jsonml; 384 }, 385 386 // There are two types of lists. Tight and loose. Tight lists have no whitespace 387 // between the items (and result in text just in the <li>) and loose lists, 388 // which have an empty line between list items, resulting in (one or more) 389 // paragraphs inside the <li>. 390 // 391 // There are all sorts weird edge cases about the original markdown.pl's 392 // handling of lists: 393 // 394 // * Nested lists are supposed to be indented by four chars per level. But 395 // if they aren't, you can get a nested list by indenting by less than 396 // four so long as the indent doesn't match an indent of an existing list 397 // item in the 'nest stack'. 398 // 399 // * The type of the list (bullet or number) is controlled just by the 400 // first item at the indent. Subsequent changes are ignored unless they 401 // are for nested lists 402 // 403 lists: (function( ) { 404 // Use a closure to hide a few variables. 405 var any_list = "[*+-]|\\d\\.", 406 bullet_list = /[*+-]/, 407 number_list = /\d+\./, 408 // Capture leading indent as it matters for determining nested lists. 409 is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ), 410 indent_re = "(?: {0,3}\\t| {4})"; 411 412 // TODO: Cache this regexp for certain depths. 413 // Create a regexp suitable for matching an li for a given stack depth 414 function regex_for_depth( depth ) { 415 416 return new RegExp( 417 // m[1] = indent, m[2] = list_type 418 "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" + 419 // m[3] = cont 420 "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})" 421 ); 422 } 423 function expand_tab( input ) { 424 return input.replace( / {0,3}\t/g, " " ); 425 } 426 427 // Add inline content `inline` to `li`. inline comes from processInline 428 // so is an array of content 429 function add(li, loose, inline, nl) { 430 if (loose) { 431 li.push( [ "para" ].concat(inline) ); 432 return; 433 } 434 // Hmmm, should this be any block level element or just paras? 435 var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] == "para" 436 ? li[li.length -1] 437 : li; 438 439 // If there is already some content in this list, add the new line in 440 if (nl && li.length > 1) inline.unshift(nl); 441 442 for (var i=0; i < inline.length; i++) { 443 var what = inline[i], 444 is_str = typeof what == "string"; 445 if (is_str && add_to.length > 1 && typeof add_to[add_to.length-1] == "string" ) 446 { 447 add_to[ add_to.length-1 ] += what; 448 } 449 else { 450 add_to.push( what ); 451 } 452 } 453 } 454 455 // contained means have an indent greater than the current one. On 456 // *every* line in the block 457 function get_contained_blocks( depth, blocks ) { 458 459 var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ), 460 replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"), 461 ret = []; 462 463 while ( blocks.length > 0 ) { 464 if ( re.exec( blocks[0] ) ) { 465 var b = blocks.shift(), 466 // Now remove that indent 467 x = b.replace( replace, ""); 468 469 ret.push( mk_block( x, b.trailing, b.lineNumber ) ); 470 } 471 break; 472 } 473 return ret; 474 } 475 476 // passed to stack.forEach to turn list items up the stack into paras 477 function paragraphify(s, i, stack) { 478 var list = s.list; 479 var last_li = list[list.length-1]; 480 481 if (last_li[1] instanceof Array && last_li[1][0] == "para") { 482 return; 483 } 484 if (i+1 == stack.length) { 485 // Last stack frame 486 // Keep the same array, but replace the contents 487 last_li.push( ["para"].concat( last_li.splice(1) ) ); 488 } 489 else { 490 var sublist = last_li.pop(); 491 last_li.push( ["para"].concat( last_li.splice(1) ), sublist ); 492 } 493 } 494 495 // The matcher function 496 return function( block, next ) { 497 var m = block.match( is_list_re ); 498 if ( !m ) return undefined; 499 500 function make_list( m ) { 501 var list = bullet_list.exec( m[2] ) 502 ? ["bulletlist"] 503 : ["numberlist"]; 504 505 stack.push( { list: list, indent: m[1] } ); 506 return list; 507 } 508 509 510 var stack = [], // Stack of lists for nesting. 511 list = make_list( m ), 512 last_li, 513 loose = false, 514 ret = [ stack[0].list ]; 515 516 // Loop to search over block looking for inner block elements and loose lists 517 loose_search: 518 while( true ) { 519 // Split into lines preserving new lines at end of line 520 var lines = block.split( /(?=\n)/ ); 521 522 // We have to grab all lines for a li and call processInline on them 523 // once as there are some inline things that can span lines. 524 var li_accumulate = ""; 525 526 // Loop over the lines in this block looking for tight lists. 527 tight_search: 528 for (var line_no=0; line_no < lines.length; line_no++) { 529 var nl = "", 530 l = lines[line_no].replace(/^\n/, function(n) { nl = n; return "" }); 531 532 // TODO: really should cache this 533 var line_re = regex_for_depth( stack.length ); 534 535 m = l.match( line_re ); 536 //print( "line:", uneval(l), "\nline match:", uneval(m) ); 537 538 // We have a list item 539 if ( m[1] !== undefined ) { 540 // Process the previous list item, if any 541 if ( li_accumulate.length ) { 542 add( last_li, loose, this.processInline( li_accumulate ), nl ); 543 // Loose mode will have been dealt with. Reset it 544 loose = false; 545 li_accumulate = ""; 546 } 547 548 m[1] = expand_tab( m[1] ); 549 var wanted_depth = Math.floor(m[1].length/4)+1; 550 //print( "want:", wanted_depth, "stack:", stack.length); 551 if ( wanted_depth > stack.length ) { 552 // Deep enough for a nested list outright 553 //print ( "new nested list" ); 554 list = make_list( m ); 555 last_li.push( list ); 556 last_li = list[1] = [ "listitem" ]; 557 } 558 else { 559 // We aren't deep enough to be strictly a new level. This is 560 // where Md.pl goes nuts. If the indent matches a level in the 561 // stack, put it there, else put it one deeper then the 562 // wanted_depth deserves. 563 var found = stack.some(function(s, i) { 564 if ( s.indent != m[1] ) return false; 565 list = s.list; // Found the level we want 566 stack.splice(i+1); // Remove the others 567 //print("found"); 568 return true; // And stop looping 569 }); 570 571 if (!found) { 572 //print("not found. l:", uneval(l)); 573 wanted_depth++; 574 if (wanted_depth <= stack.length) { 575 stack.splice(wanted_depth); 576 //print("Desired depth now", wanted_depth, "stack:", stack.length); 577 list = stack[wanted_depth-1].list; 578 //print("list:", uneval(list) ); 579 } 580 else { 581 //print ("made new stack for messy indent"); 582 list = make_list(m); 583 last_li.push(list); 584 } 585 } 586 587 //print( uneval(list), "last", list === stack[stack.length-1].list ); 588 last_li = [ "listitem" ]; 589 list.push(last_li); 590 } // end depth of shenegains 591 nl = ""; 592 } 593 594 // Add content 595 if (l.length > m[0].length) { 596 li_accumulate += nl + l.substr( m[0].length ); 597 } 598 } // tight_search 599 600 if ( li_accumulate.length ) { 601 add( last_li, loose, this.processInline( li_accumulate ), nl ); 602 // Loose mode will have been dealt with. Reset it 603 loose = false; 604 li_accumulate = ""; 605 } 606 607 // Look at the next block - we might have a loose list. Or an extra 608 // paragraph for the current li 609 var contained = get_contained_blocks( stack.length, next ); 610 611 // Deal with code blocks or properly nested lists 612 if (contained.length > 0) { 613 // Make sure all listitems up the stack are paragraphs 614 stack.forEach( paragraphify, this ); 615 616 last_li.push.apply( last_li, this.toTree( contained, [] ) ); 617 } 618 619 var next_block = next[0] && next[0].valueOf() || ""; 620 621 if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) { 622 block = next.shift(); 623 624 // Check for an HR following a list: features/lists/hr_abutting 625 var hr = this.dialect.block.horizRule( block, next ); 626 627 if (hr) { 628 ret.push.apply(ret, hr); 629 break; 630 } 631 632 // Make sure all listitems up the stack are paragraphs 633 stack.forEach( paragraphify , this ); 634 635 loose = true; 636 continue loose_search; 637 } 638 break; 639 } // loose_search 640 641 return ret; 642 } 643 })(), 644 645 blockquote: function blockquote( block, next ) { 646 if ( !block.match( /^>/m ) ) 647 return undefined; 648 649 var jsonml = []; 650 651 // separate out the leading abutting block, if any 652 if ( block[ 0 ] != ">" ) { 653 var lines = block.split( /\n/ ), 654 prev = []; 655 656 // keep shifting lines until you find a crotchet 657 while ( lines.length && lines[ 0 ][ 0 ] != ">" ) { 658 prev.push( lines.shift() ); 659 } 660 661 // reassemble! 662 block = lines.join( "\n" ); 663 jsonml.push.apply( jsonml, this.processBlock( prev.join( "\n" ), [] ) ); 664 } 665 666 // if the next block is also a blockquote merge it in 667 while ( next.length && next[ 0 ][ 0 ] == ">" ) { 668 var b = next.shift(); 669 block += block.trailing + b; 670 block.trailing = b.trailing; 671 } 672 673 // Strip off the leading "> " and re-process as a block. 674 var input = block.replace( /^> ?/gm, '' ), 675 old_tree = this.tree; 676 jsonml.push( this.toTree( input, [ "blockquote" ] ) ); 677 678 return jsonml; 679 }, 680 681 referenceDefn: function referenceDefn( block, next) { 682 var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/; 683 // interesting matches are [ , ref_id, url, , title, title ] 684 685 if ( !block.match(re) ) 686 return undefined; 687 688 // make an attribute node if it doesn't exist 689 if ( !extract_attr( this.tree ) ) { 690 this.tree.splice( 1, 0, {} ); 691 } 692 693 var attrs = extract_attr( this.tree ); 694 695 // make a references hash if it doesn't exist 696 if ( attrs.references === undefined ) { 697 attrs.references = {}; 698 } 699 700 var b = this.loop_re_over_block(re, block, function( m ) { 701 702 if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' ) 703 m[2] = m[2].substring( 1, m[2].length - 1 ); 704 705 var ref = attrs.references[ m[1].toLowerCase() ] = { 706 href: m[2] 707 }; 708 709 if (m[4] !== undefined) 710 ref.title = m[4]; 711 else if (m[5] !== undefined) 712 ref.title = m[5]; 713 714 } ); 715 716 if (b.length) 717 next.unshift( mk_block( b, block.trailing ) ); 718 719 return []; 720 }, 721 722 para: function para( block, next ) { 723 // everything's a para! 724 return [ ["para"].concat( this.processInline( block ) ) ]; 725 } 726 } 727 } 728 729 Markdown.dialects.Gruber.inline = { 730 __call__: function inline( text, patterns ) { 731 // Hmmm - should this function be directly in Md#processInline, or 732 // conversely, should Md#processBlock be moved into block.__call__ too 733 var out = [ ], 734 m, 735 // Look for the next occurange of a special character/pattern 736 re = new RegExp( "([\\s\\S]*?)(" + (patterns.source || patterns) + ")", "g" ), 737 lastIndex = 0; 738 739 //D:var self = this; 740 //D:self.debug("processInline:", uneval(text) ); 741 function add(x) { 742 //D:self.debug(" adding output", uneval(x)); 743 if (typeof x == "string" && typeof out[out.length-1] == "string") 744 out[ out.length-1 ] += x; 745 else 746 out.push(x); 747 } 748 749 while ( ( m = re.exec(text) ) != null) { 750 if ( m[1] ) add( m[1] ); // Some un-interesting text matched 751 else m[1] = { length: 0 }; // Or there was none, but make m[1].length == 0 752 753 var res; 754 if ( m[2] in this.dialect.inline ) { 755 res = this.dialect.inline[ m[2] ].call( 756 this, 757 text.substr( m.index + m[1].length ), m, out ); 758 } 759 // Default for now to make dev easier. just slurp special and output it. 760 res = res || [ m[2].length, m[2] ]; 761 762 var len = res.shift(); 763 // Update how much input was consumed 764 re.lastIndex += ( len - m[2].length ); 765 766 // Add children 767 res.forEach(add); 768 769 lastIndex = re.lastIndex; 770 } 771 772 // Add last 'boring' chunk 773 if ( text.length > lastIndex ) 774 add( text.substr( lastIndex ) ); 775 776 return out; 777 }, 778 779 "\\": function escaped( text ) { 780 // [ length of input processed, node/children to add... ] 781 // Only esacape: \ ` * _ { } [ ] ( ) # * + - . ! 782 if ( text.match( /^\\[\\`\*_{}\[\]()#\+.!\-]/ ) ) 783 return [ 2, text[1] ]; 784 else 785 // Not an esacpe 786 return [ 1, "\\" ]; 787 }, 788 789 " 791 // 1 2 3 4 <--- captures 792 var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*(\S*)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ ); 793 794 if ( m ) { 795 if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' ) 796 m[2] = m[2].substring( 1, m[2].length - 1 ); 797 798 m[2] == this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; 799 800 var attrs = { alt: m[1], href: m[2] || "" }; 801 if ( m[4] !== undefined) 802 attrs.title = m[4]; 803 804 return [ m[0].length, [ "img", attrs ] ]; 805 } 806 807 // ![Alt text][id] 808 m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ ); 809 810 if ( m ) { 811 // We can't check if the reference is known here as it likely wont be 812 // found till after. Check it in md tree->hmtl tree conversion 813 return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), text: m[0] } ] ]; 814 } 815 816 // Just consume the '![' 817 return [ 2, "![" ]; 818 }, 819 820 "[": function link( text ) { 821 // [link text](/path/to/img.jpg "Optional title") 822 // 1 2 3 4 <--- captures 823 var m = text.match( /^\[([\s\S]*?)\][ \t]*\([ \t]*(\S+)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ ); 824 825 if ( m ) { 826 if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' ) 827 m[2] = m[2].substring( 1, m[2].length - 1 ); 828 829 // Process escapes only 830 m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; 831 832 var attrs = { href: m[2] || "" }; 833 if ( m[4] !== undefined) 834 attrs.title = m[4]; 835 836 var link = [ "link", attrs ]; 837 Array.prototype.push.apply( link, this.processInline( m[1] ) ); 838 return [ m[0].length, link ]; 839 } 840 841 // [Alt text][id] 842 // [Alt text] [id] 843 // [id] 844 m = text.match( /^\[([\s\S]*?)\](?: ?\[(.*?)\])?/ ); 845 846 if ( m ) { 847 // [id] case, text == id 848 if ( m[2] === undefined || m[2] === "" ) m[2] = m[1]; 849 850 attrs = { ref: m[ 2 ].toLowerCase(), original: m[ 0 ] }; 851 link = [ "link_ref", attrs ]; 852 Array.prototype.push.apply( link, this.processInline( m[1] ) ); 853 854 // We can't check if the reference is known here as it likely wont be 855 // found till after. Check it in md tree->hmtl tree conversion. 856 // Store the original so that conversion can revert if the ref isn't found. 857 return [ 858 m[ 0 ].length, 859 link 860 ]; 861 } 862 863 // Just consume the '[' 864 return [ 1, "[" ]; 865 }, 866 867 868 "<": function autoLink( text ) { 869 var m; 870 871 if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) != null ) { 872 if ( m[3] ) { 873 return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ]; 874 875 } 876 else if ( m[2] == "mailto" ) { 877 return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ]; 878 } 879 else 880 return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ]; 881 } 882 883 return [ 1, "<" ]; 884 }, 885 886 "`": function inlineCode( text ) { 887 // Inline code block. as many backticks as you like to start it 888 // Always skip over the opening ticks. 889 var m = text.match( /(`+)(([\s\S]*?)\1)/ ); 890 891 if ( m && m[2] ) 892 return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ]; 893 else { 894 // TODO: No matching end code found - warn! 895 return [ 1, "`" ]; 896 } 897 }, 898 899 " \n": function lineBreak( text ) { 900 return [ 3, [ "linebreak" ] ]; 901 } 902 903 } 904 905 // Meta Helper/generator method for em and strong handling 906 function strong_em( tag, md ) { 907 908 var state_slot = tag + "_state", 909 other_slot = tag == "strong" ? "em_state" : "strong_state"; 910 911 function CloseTag(len) { 912 this.len_after = len; 913 this.name = "close_" + md; 914 } 915 916 return function ( text, orig_match ) { 917 918 if (this[state_slot][0] == md) { 919 // Most recent em is of this type 920 //D:this.debug("closing", md); 921 this[state_slot].shift(); 922 923 // "Consume" everything to go back to the recrusion in the else-block below 924 return[ text.length, new CloseTag(text.length-md.length) ]; 925 } 926 else { 927 // Store a clone of the em/strong states 928 var other = this[other_slot].slice(), 929 state = this[state_slot].slice(); 930 931 this[state_slot].unshift(md); 932 933 //D:this.debug_indent += " "; 934 935 // Recurse 936 var res = this.processInline( text.substr( md.length ) ); 937 //D:this.debug_indent = this.debug_indent.substr(2); 938 939 var last = res[res.length - 1]; 940 941 //D:this.debug("processInline from", tag + ": ", uneval( res ) ); 942 943 var check = this[state_slot].shift(); 944 if (last instanceof CloseTag) { 945 res.pop(); 946 // We matched! Huzzah. 947 var consumed = text.length - last.len_after; 948 return [ consumed, [ tag ].concat(res) ]; 949 } 950 else { 951 // Restore the state of the other kind. We might have mistakenly closed it. 952 this[other_slot] = other; 953 this[state_slot] = state; 954 955 // We can't reuse the processed result as it could have wrong parsing contexts in it. 956 return [ md.length, md ]; 957 } 958 } 959 } // End returned function 960 } 961 962 Markdown.dialects.Gruber.inline["**"] = strong_em("strong", "**"); 963 Markdown.dialects.Gruber.inline["__"] = strong_em("strong", "__"); 964 Markdown.dialects.Gruber.inline["*"] = strong_em("em", "*"); 965 Markdown.dialects.Gruber.inline["_"] = strong_em("em", "_"); 966 967 968 // Build default order from insertion order. 969 Markdown.buildBlockOrder = function(d) { 970 var ord = []; 971 for ( var i in d ) { 972 if ( i == "__order__" || i == "__call__" ) continue; 973 ord.push( i ); 974 } 975 d.__order__ = ord; 976 } 977 978 // Build patterns for inline matcher 979 Markdown.buildInlinePatterns = function(d) { 980 var patterns = []; 981 982 for ( var i in d ) { 983 if (i == "__call__") continue; 984 var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" ) 985 .replace( /\n/, "\\n" ); 986 patterns.push( i.length == 1 ? l : "(?:" + l + ")" ); 987 } 988 989 patterns = patterns.join("|"); 990 //print("patterns:", uneval( patterns ) ); 991 992 var fn = d.__call__; 993 d.__call__ = function(text, pattern) { 994 if (pattern != undefined) 995 return fn.call(this, text, pattern); 996 else 997 return fn.call(this, text, patterns); 998 } 999 } 1000 1001 // Helper function to make sub-classing a dialect easier 1002 Markdown.subclassDialect = function( d ) { 1003 function Block() {}; 1004 Block.prototype = d.block; 1005 function Inline() {}; 1006 Inline.prototype = d.inline; 1007 1008 return { block: new Block(), inline: new Inline() }; 1009 } 1010 1011 Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block ); 1012 Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline ); 1013 1014 Markdown.dialects.Maruku = Markdown.subclassDialect( Markdown.dialects.Gruber ); 1015 1016 Markdown.dialects.Maruku.block.document_meta = function document_meta( block, next ) { 1017 // we're only interested in the first block 1018 if ( block.lineNumber > 1 ) return undefined; 1019 1020 // document_meta blocks consist of one or more lines of `Key: Value\n` 1021 if ( ! block.match( /^(?:\w+:.*\n)*\w+:.*$/ ) ) return undefined; 1022 1023 // make an attribute node if it doesn't exist 1024 if ( !extract_attr( this.tree ) ) { 1025 this.tree.splice( 1, 0, {} ); 1026 } 1027 1028 var pairs = block.split( /\n/ ); 1029 for ( p in pairs ) { 1030 var m = pairs[ p ].match( /(\w+):\s*(.*)$/ ), 1031 key = m[ 1 ].toLowerCase(), 1032 value = m[ 2 ]; 1033 1034 this.tree[ 1 ][ key ] = value; 1035 } 1036 1037 // document_meta produces no content! 1038 return []; 1039 } 1040 1041 Markdown.dialects.Maruku.block.block_meta = function block_meta( block, next ) { 1042 // check if the last line of the block is an meta hash 1043 var m = block.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ ); 1044 if ( !m ) return undefined; 1045 1046 // process the meta hash 1047 var attr = process_meta_hash( m[ 2 ] ); 1048 1049 // if we matched ^ then we need to apply meta to the previous block 1050 if ( m[ 1 ] === "" ) { 1051 var node = this.tree[ this.tree.length - 1 ], 1052 hash = extract_attr( node ); 1053 1054 // if the node is a string (rather than JsonML), bail 1055 if ( typeof node === "string" ) return undefined; 1056 1057 // create the attribute hash if it doesn't exist 1058 if ( !hash ) { 1059 hash = {}; 1060 node.splice( 1, 0, hash ); 1061 } 1062 1063 // add the attributes in 1064 for ( a in attr ) { 1065 hash[ a ] = attr[ a ]; 1066 } 1067 1068 // return nothing so the meta hash is removed 1069 return []; 1070 } 1071 1072 // pull the meta hash off the block and process what's left 1073 var b = block.replace( /\n.*$/, "" ), 1074 result = this.processBlock( b, [] ); 1075 1076 // get or make the attributes hash 1077 var hash = extract_attr( result[ 0 ] ); 1078 if ( !hash ) { 1079 hash = {}; 1080 result[ 0 ].splice( 1, 0, hash ); 1081 } 1082 1083 // attach the attributes to the block 1084 for ( a in attr ) { 1085 hash[ a ] = attr[ a ]; 1086 } 1087 1088 return result; 1089 } 1090 1091 Markdown.dialects.Maruku.block.definition_list = function definition_list( block, next ) { 1092 // one or more terms followed by one or more definitions, in a single block 1093 var tight = /^((?:[^\s:].*\n)+):\s+([^]+)$/, 1094 list = [ "dl" ]; 1095 1096 // see if we're dealing with a tight or loose block 1097 if ( ( m = block.match( tight ) ) ) { 1098 // pull subsequent tight DL blocks out of `next` 1099 var blocks = [ block ]; 1100 while ( next.length && tight.exec( next[ 0 ] ) ) { 1101 blocks.push( next.shift() ); 1102 } 1103 1104 for ( var b = 0; b < blocks.length; ++b ) { 1105 var m = blocks[ b ].match( tight ), 1106 terms = m[ 1 ].replace( /\n$/, "" ).split( /\n/ ), 1107 defns = m[ 2 ].split( /\n:\s+/ ); 1108 1109 // print( uneval( m ) ); 1110 1111 for ( var i = 0; i < terms.length; ++i ) { 1112 list.push( [ "dt", terms[ i ] ] ); 1113 } 1114 1115 for ( var i = 0; i < defns.length; ++i ) { 1116 // run inline processing over the definition 1117 list.push( [ "dd" ].concat( this.processInline( defns[ i ].replace( /(\n)\s+/, "$1" ) ) ) ); 1118 } 1119 } 1120 } 1121 else { 1122 return undefined; 1123 } 1124 1125 return [ list ]; 1126 } 1127 1128 Markdown.dialects.Maruku.inline[ "{:" ] = function inline_meta( text, matches, out ) { 1129 if ( !out.length ) { 1130 return [ 2, "{:" ]; 1131 } 1132 1133 // get the preceeding element 1134 var before = out[ out.length - 1 ]; 1135 1136 if ( typeof before === "string" ) { 1137 return [ 2, "{:" ]; 1138 } 1139 1140 // match a meta hash 1141 var m = text.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ ); 1142 1143 // no match, false alarm 1144 if ( !m ) { 1145 return [ 2, "{:" ]; 1146 } 1147 1148 // attach the attributes to the preceeding element 1149 var meta = process_meta_hash( m[ 1 ] ), 1150 attr = extract_attr( before ); 1151 1152 if ( !attr ) { 1153 attr = {}; 1154 before.splice( 1, 0, attr ); 1155 } 1156 1157 for ( var k in meta ) { 1158 attr[ k ] = meta[ k ]; 1159 } 1160 1161 // cut out the string and replace it with nothing 1162 return [ m[ 0 ].length, "" ]; 1163 } 1164 1165 Markdown.buildBlockOrder ( Markdown.dialects.Maruku.block ); 1166 Markdown.buildInlinePatterns( Markdown.dialects.Maruku.inline ); 1167 1168 var isArray = expose.isArray = function(obj) { 1169 return (obj instanceof Array || typeof obj === "array" || Array.isArray(obj)); 1170 } 1171 1172 function extract_attr( jsonml ) { 1173 return isArray(jsonml) 1174 && jsonml.length > 1 1175 && typeof jsonml[ 1 ] === "object" 1176 && !( isArray(jsonml[ 1 ]) ) 1177 ? jsonml[ 1 ] 1178 : undefined; 1179 } 1180 1181 function process_meta_hash( meta_string ) { 1182 var meta = split_meta_hash( meta_string ), 1183 attr = {}; 1184 1185 for ( var i = 0; i < meta.length; ++i ) { 1186 // id: #foo 1187 if ( /^#/.test( meta[ i ] ) ) { 1188 attr.id = meta[ i ].substring( 1 ); 1189 } 1190 // class: .foo 1191 else if ( /^\./.test( meta[ i ] ) ) { 1192 // if class already exists, append the new one 1193 if ( attr['class'] ) { 1194 attr['class'] = attr['class'] + meta[ i ].replace( /./, " " ); 1195 } 1196 else { 1197 attr['class'] = meta[ i ].substring( 1 ); 1198 } 1199 } 1200 // attribute: foo=bar 1201 else if ( /=/.test( meta[ i ] ) ) { 1202 var s = meta[ i ].split( /=/ ); 1203 attr[ s[ 0 ] ] = s[ 1 ]; 1204 } 1205 } 1206 1207 return attr; 1208 } 1209 1210 function split_meta_hash( meta_string ) { 1211 var meta = meta_string.split( "" ), 1212 parts = [ "" ], 1213 in_quotes = false; 1214 1215 while ( meta.length ) { 1216 var letter = meta.shift(); 1217 switch ( letter ) { 1218 case " " : 1219 // if we're in a quoted section, keep it 1220 if ( in_quotes ) { 1221 parts[ parts.length - 1 ] += letter; 1222 } 1223 // otherwise make a new part 1224 else { 1225 parts.push( "" ); 1226 } 1227 break; 1228 case "'" : 1229 case '"' : 1230 // reverse the quotes and move straight on 1231 in_quotes = !in_quotes; 1232 break; 1233 case "\\" : 1234 // shift off the next letter to be used straight away. 1235 // it was escaped so we'll keep it whatever it is 1236 letter = meta.shift(); 1237 default : 1238 parts[ parts.length - 1 ] += letter; 1239 break; 1240 } 1241 } 1242 1243 return parts; 1244 } 1245 1246 /** 1247 * renderJsonML( jsonml[, options] ) -> String 1248 * - jsonml (Array): JsonML array to render to XML 1249 * - options (Object): options 1250 * 1251 * Converts the given JsonML into well-formed XML. 1252 * 1253 * The options currently understood are: 1254 * 1255 * - root (Boolean): wether or not the root node should be included in the 1256 * output, or just its children. The default `false` is to not include the 1257 * root itself. 1258 */ 1259 expose.renderJsonML = function( jsonml, options ) { 1260 options = options || {}; 1261 // include the root element in the rendered output? 1262 options.root = options.root || false; 1263 1264 var content = []; 1265 1266 if ( options.root ) { 1267 content.push( render_tree( jsonml ) ); 1268 } 1269 else { 1270 jsonml.shift(); // get rid of the tag 1271 if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { 1272 jsonml.shift(); // get rid of the attributes 1273 } 1274 1275 while ( jsonml.length ) { 1276 content.push( render_tree( jsonml.shift() ) ); 1277 } 1278 } 1279 1280 return content.join( "\n\n" ); 1281 } 1282 1283 function escapeHTML( text ) { 1284 return text.replace( /&/g, "&" ) 1285 .replace( /</g, "<" ) 1286 .replace( />/g, ">" ) 1287 .replace( /"/g, """ ) 1288 .replace( /'/g, "'" ); 1289 } 1290 1291 function render_tree( jsonml ) { 1292 // basic case 1293 if ( typeof jsonml === "string" ) { 1294 return escapeHTML( jsonml ); 1295 } 1296 1297 var tag = jsonml.shift(), 1298 attributes = {}, 1299 content = []; 1300 1301 if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { 1302 attributes = jsonml.shift(); 1303 } 1304 1305 while ( jsonml.length ) { 1306 content.push( arguments.callee( jsonml.shift() ) ); 1307 } 1308 1309 var tag_attrs = ""; 1310 for ( var a in attributes ) { 1311 tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"'; 1312 } 1313 1314 // be careful about adding whitespace here for inline elements 1315 return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">"; 1316 } 1317 1318 function convert_tree_to_html( tree, references, options ) { 1319 options = options || {}; 1320 1321 // shallow clone 1322 var jsonml = tree.slice( 0 ); 1323 1324 if (typeof options.preprocessTreeNode === "function") { 1325 jsonml = options.preprocessTreeNode(jsonml, references); 1326 } 1327 1328 // Clone attributes if they exist 1329 var attrs = extract_attr( jsonml ); 1330 if ( attrs ) { 1331 jsonml[ 1 ] = {}; 1332 for ( var i in attrs ) { 1333 jsonml[ 1 ][ i ] = attrs[ i ]; 1334 } 1335 attrs = jsonml[ 1 ]; 1336 } 1337 1338 // basic case 1339 if ( typeof jsonml === "string" ) { 1340 return jsonml; 1341 } 1342 1343 // convert this node 1344 switch ( jsonml[ 0 ] ) { 1345 case "header": 1346 jsonml[ 0 ] = "h" + jsonml[ 1 ].level; 1347 delete jsonml[ 1 ].level; 1348 break; 1349 case "bulletlist": 1350 jsonml[ 0 ] = "ul"; 1351 break; 1352 case "numberlist": 1353 jsonml[ 0 ] = "ol"; 1354 break; 1355 case "listitem": 1356 jsonml[ 0 ] = "li"; 1357 break; 1358 case "para": 1359 jsonml[ 0 ] = "p"; 1360 break; 1361 case "markdown": 1362 jsonml[ 0 ] = "html"; 1363 if ( attrs ) delete attrs.references; 1364 break; 1365 case "code_block": 1366 jsonml[ 0 ] = "pre"; 1367 var i = attrs ? 2 : 1; 1368 var code = [ "code" ]; 1369 code.push.apply( code, jsonml.splice( i ) ); 1370 jsonml[ i ] = code; 1371 break; 1372 case "inlinecode": 1373 jsonml[ 0 ] = "code"; 1374 break; 1375 case "img": 1376 jsonml[ 1 ].src = jsonml[ 1 ].href; 1377 delete jsonml[ 1 ].href; 1378 break; 1379 case "linebreak": 1380 jsonml[0] = "br"; 1381 break; 1382 case "link": 1383 jsonml[ 0 ] = "a"; 1384 break; 1385 case "link_ref": 1386 jsonml[ 0 ] = "a"; 1387 1388 // grab this ref and clean up the attribute node 1389 var ref = references[ attrs.ref ]; 1390 1391 // if the reference exists, make the link 1392 if ( ref ) { 1393 delete attrs.ref; 1394 1395 // add in the href and title, if present 1396 attrs.href = ref.href; 1397 if ( ref.title ) { 1398 attrs.title = ref.title; 1399 } 1400 1401 // get rid of the unneeded original text 1402 delete attrs.original; 1403 } 1404 // the reference doesn't exist, so revert to plain text 1405 else { 1406 return attrs.original; 1407 } 1408 break; 1409 } 1410 1411 // convert all the children 1412 var i = 1; 1413 1414 // deal with the attribute node, if it exists 1415 if ( attrs ) { 1416 // if there are keys, skip over it 1417 for ( var key in jsonml[ 1 ] ) { 1418 i = 2; 1419 } 1420 // if there aren't, remove it 1421 if ( i === 1 ) { 1422 jsonml.splice( i, 1 ); 1423 } 1424 } 1425 1426 for ( ; i < jsonml.length; ++i ) { 1427 jsonml[ i ] = arguments.callee( jsonml[ i ], references, options ); 1428 } 1429 1430 return jsonml; 1431 } 1432 1433 1434 // merges adjacent text nodes into a single node 1435 function merge_text_nodes( jsonml ) { 1436 // skip the tag name and attribute hash 1437 var i = extract_attr( jsonml ) ? 2 : 1; 1438 1439 while ( i < jsonml.length ) { 1440 // if it's a string check the next item too 1441 if ( typeof jsonml[ i ] === "string" ) { 1442 if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) { 1443 // merge the second string into the first and remove it 1444 jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ]; 1445 } 1446 else { 1447 ++i; 1448 } 1449 } 1450 // if it's not a string recurse 1451 else { 1452 arguments.callee( jsonml[ i ] ); 1453 ++i; 1454 } 1455 } 1456 } 1457 1458 } )( (function() { 1459 if ( typeof exports === "undefined" ) { 1460 window.markdown = {}; 1461 return window.markdown; 1462 } 1463 else { 1464 return exports; 1465 } 1466 } )() );