werc-1.5.0-tweaks

Tweaks for the werc website builder created by the mad architect Uriel
Log | Files | Refs | README

markdown.pl (35623B)


      1 #!/usr/bin/env perl
      2 #
      3 # Markdown -- A text-to-HTML conversion tool for web writers
      4 #
      5 # Copyright (c) 2004 John Gruber
      6 # <http://daringfireball.net/projects/markdown/>
      7 #
      8 package Markdown;
      9 require 5.006_000;
     10 use strict;
     11 use warnings;
     12 
     13 use Digest::MD5 qw(md5_hex);
     14 use vars qw($VERSION);
     15 $VERSION = '1.0.1';
     16 # Tue 14 Dec 2004
     17 
     18 ## Disabled; causes problems under Perl 5.6.1:
     19 # use utf8;
     20 # binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
     21 
     22 
     23 #
     24 # Global default settings:
     25 #
     26 my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
     27 my $g_tab_width = 4;
     28 
     29 
     30 #
     31 # Globals:
     32 #
     33 
     34 # Regex to match balanced [brackets]. See Friedl's
     35 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
     36 my $g_nested_brackets;
     37 $g_nested_brackets = qr{
     38 	(?> 								# Atomic matching
     39 	   [^\[\]]+							# Anything other than brackets
     40 	 | 
     41 	   \[
     42 		 (??{ $g_nested_brackets })		# Recursive set of nested brackets
     43 	   \]
     44 	)*
     45 }x;
     46 
     47 
     48 # Table of hash values for escaped characters:
     49 my %g_escape_table;
     50 foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
     51 	$g_escape_table{$char} = md5_hex($char);
     52 }
     53 
     54 
     55 # Global hashes, used by various utility routines
     56 my %g_urls;
     57 my %g_titles;
     58 my %g_html_blocks;
     59 
     60 # Used to track when we're inside an ordered or unordered list
     61 # (see _ProcessListItems() for details):
     62 my $g_list_level = 0;
     63 
     64 
     65 #### Blosxom plug-in interface ##########################################
     66 
     67 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
     68 # which posts Markdown should process, using a "meta-markup: markdown"
     69 # header. If it's set to 0 (the default), Markdown will process all
     70 # entries.
     71 my $g_blosxom_use_meta = 0;
     72 
     73 sub start { 1; }
     74 sub story {
     75 	my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
     76 
     77 	if ( (! $g_blosxom_use_meta) or
     78 	     (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
     79 	     ){
     80 			$$body_ref  = Markdown($$body_ref);
     81      }
     82      1;
     83 }
     84 
     85 
     86 #### Movable Type plug-in interface #####################################
     87 eval {require MT};  # Test to see if we're running in MT.
     88 unless ($@) {
     89     require MT;
     90     import  MT;
     91     require MT::Template::Context;
     92     import  MT::Template::Context;
     93 
     94 	eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
     95 	unless ($@) {
     96 		require MT::Plugin;
     97 		import  MT::Plugin;
     98 		my $plugin = new MT::Plugin({
     99 			name => "Markdown",
    100 			description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
    101 			doc_link => 'http://daringfireball.net/projects/markdown/'
    102 		});
    103 		MT->add_plugin( $plugin );
    104 	}
    105 
    106 	MT::Template::Context->add_container_tag(MarkdownOptions => sub {
    107 		my $ctx	 = shift;
    108 		my $args = shift;
    109 		my $builder = $ctx->stash('builder');
    110 		my $tokens = $ctx->stash('tokens');
    111 
    112 		if (defined ($args->{'output'}) ) {
    113 			$ctx->stash('markdown_output', lc $args->{'output'});
    114 		}
    115 
    116 		defined (my $str = $builder->build($ctx, $tokens) )
    117 			or return $ctx->error($builder->errstr);
    118 		$str;		# return value
    119 	});
    120 
    121 	MT->add_text_filter('markdown' => {
    122 		label     => 'Markdown',
    123 		docs      => 'http://daringfireball.net/projects/markdown/',
    124 		on_format => sub {
    125 			my $text = shift;
    126 			my $ctx  = shift;
    127 			my $raw  = 0;
    128 		    if (defined $ctx) {
    129 		    	my $output = $ctx->stash('markdown_output'); 
    130 				if (defined $output  &&  $output =~ m/^html/i) {
    131 					$g_empty_element_suffix = ">";
    132 					$ctx->stash('markdown_output', '');
    133 				}
    134 				elsif (defined $output  &&  $output eq 'raw') {
    135 					$raw = 1;
    136 					$ctx->stash('markdown_output', '');
    137 				}
    138 				else {
    139 					$raw = 0;
    140 					$g_empty_element_suffix = " />";
    141 				}
    142 			}
    143 			$text = $raw ? $text : Markdown($text);
    144 			$text;
    145 		},
    146 	});
    147 
    148 	# If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
    149 	my $smartypants;
    150 
    151 	{
    152 		no warnings "once";
    153 		$smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
    154 	}
    155 
    156 	if ($smartypants) {
    157 		MT->add_text_filter('markdown_with_smartypants' => {
    158 			label     => 'Markdown With SmartyPants',
    159 			docs      => 'http://daringfireball.net/projects/markdown/',
    160 			on_format => sub {
    161 				my $text = shift;
    162 				my $ctx  = shift;
    163 				if (defined $ctx) {
    164 					my $output = $ctx->stash('markdown_output'); 
    165 					if (defined $output  &&  $output eq 'html') {
    166 						$g_empty_element_suffix = ">";
    167 					}
    168 					else {
    169 						$g_empty_element_suffix = " />";
    170 					}
    171 				}
    172 				$text = Markdown($text);
    173 				$text = $smartypants->($text, '1');
    174 			},
    175 		});
    176 	}
    177 }
    178 else {
    179 #### BBEdit/command-line text filter interface ##########################
    180 # Needs to be hidden from MT (and Blosxom when running in static mode).
    181 
    182     # We're only using $blosxom::version once; tell Perl not to warn us:
    183 	no warnings 'once';
    184     unless ( defined($blosxom::version) ) {
    185 		use warnings;
    186 
    187 		#### Check for command-line switches: #################
    188 		my %cli_opts;
    189 		use Getopt::Long;
    190 		Getopt::Long::Configure('pass_through');
    191 		GetOptions(\%cli_opts,
    192 			'version',
    193 			'shortversion',
    194 			'html4tags',
    195 		);
    196 		if ($cli_opts{'version'}) {		# Version info
    197 			print "\nThis is Markdown, version $VERSION.\n";
    198 			print "Copyright 2004 John Gruber\n";
    199 			print "http://daringfireball.net/projects/markdown/\n\n";
    200 			exit 0;
    201 		}
    202 		if ($cli_opts{'shortversion'}) {		# Just the version number string.
    203 			print $VERSION;
    204 			exit 0;
    205 		}
    206 		if ($cli_opts{'html4tags'}) {			# Use HTML tag style instead of XHTML
    207 			$g_empty_element_suffix = ">";
    208 		}
    209 
    210 
    211 		#### Process incoming text: ###########################
    212 		my $text;
    213 		{
    214 			local $/;               # Slurp the whole file
    215 			$text = <>;
    216 		}
    217         print Markdown($text);
    218     }
    219 }
    220 
    221 
    222 
    223 sub Markdown {
    224 #
    225 # Main function. The order in which other subs are called here is
    226 # essential. Link and image substitutions need to happen before
    227 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
    228 # and <img> tags get encoded.
    229 #
    230 	my $text = shift;
    231 
    232 	# Clear the global hashes. If we don't clear these, you get conflicts
    233 	# from other articles when generating a page which contains more than
    234 	# one article (e.g. an index page that shows the N most recent
    235 	# articles):
    236 	%g_urls = ();
    237 	%g_titles = ();
    238 	%g_html_blocks = ();
    239 
    240 
    241 	# Standardize line endings:
    242 	$text =~ s{\r\n}{\n}g; 	# DOS to Unix
    243 	$text =~ s{\r}{\n}g; 	# Mac to Unix
    244 
    245 	# Make sure $text ends with a couple of newlines:
    246 	$text .= "\n\n";
    247 
    248 	# Convert all tabs to spaces.
    249 	$text = _Detab($text);
    250 
    251 	# Strip any lines consisting only of spaces and tabs.
    252 	# This makes subsequent regexen easier to write, because we can
    253 	# match consecutive blank lines with /\n+/ instead of something
    254 	# contorted like /[ \t]*\n+/ .
    255 	$text =~ s/^[ \t]+$//mg;
    256 
    257 	# Turn block-level HTML blocks into hash entries
    258 	$text = _HashHTMLBlocks($text);
    259 
    260 	# Strip link definitions, store in hashes.
    261 	$text = _StripLinkDefinitions($text);
    262 
    263 	$text = _RunBlockGamut($text);
    264 
    265 	$text = _UnescapeSpecialChars($text);
    266 
    267 	return $text . "\n";
    268 }
    269 
    270 
    271 sub _StripLinkDefinitions {
    272 #
    273 # Strips link definitions from text, stores the URLs and titles in
    274 # hash references.
    275 #
    276 	my $text = shift;
    277 	my $less_than_tab = $g_tab_width - 1;
    278 
    279 	# Link defs are in the form: ^[id]: url "optional title"
    280 	while ($text =~ s{
    281 						^[ ]{0,$less_than_tab}\[(.+)\]:	# id = $1
    282 						  [ \t]*
    283 						  \n?				# maybe *one* newline
    284 						  [ \t]*
    285 						<?(\S+?)>?			# url = $2
    286 						  [ \t]*
    287 						  \n?				# maybe one newline
    288 						  [ \t]*
    289 						(?:
    290 							(?<=\s)			# lookbehind for whitespace
    291 							["(]
    292 							(.+?)			# title = $3
    293 							[")]
    294 							[ \t]*
    295 						)?	# title is optional
    296 						(?:\n+|\Z)
    297 					}
    298 					{}mx) {
    299 		$g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );	# Link IDs are case-insensitive
    300 		if ($3) {
    301 			$g_titles{lc $1} = $3;
    302 			$g_titles{lc $1} =~ s/"/&quot;/g;
    303 		}
    304 	}
    305 
    306 	return $text;
    307 }
    308 
    309 
    310 sub _HashHTMLBlocks {
    311 	my $text = shift;
    312 	my $less_than_tab = $g_tab_width - 1;
    313 
    314 	# Hashify HTML blocks:
    315 	# We only want to do this for block-level HTML tags, such as headers,
    316 	# lists, and tables. That's because we still want to wrap <p>s around
    317 	# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
    318 	# phrase emphasis, and spans. The list of tags we're looking for is
    319 	# hard-coded:
    320 	my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
    321 	my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
    322 
    323 	# First, look for nested blocks, e.g.:
    324 	# 	<div>
    325 	# 		<div>
    326 	# 		tags for inner block must be indented.
    327 	# 		</div>
    328 	# 	</div>
    329 	#
    330 	# The outermost tags must start at the left margin for this to match, and
    331 	# the inner nested divs must be indented.
    332 	# We need to do this before the next, more liberal match, because the next
    333 	# match will start at the first `<div>` and stop at the first `</div>`.
    334 	$text =~ s{
    335 				(						# save in $1
    336 					^					# start of line  (with /m)
    337 					<($block_tags_a)	# start tag = $2
    338 					\b					# word break
    339 					(.*\n)*?			# any number of lines, minimally matching
    340 					</\2>				# the matching end tag
    341 					[ \t]*				# trailing spaces/tabs
    342 					(?=\n+|\Z)	# followed by a newline or end of document
    343 				)
    344 			}{
    345 				my $key = md5_hex($1);
    346 				$g_html_blocks{$key} = $1;
    347 				"\n\n" . $key . "\n\n";
    348 			}egmx;
    349 
    350 
    351 	#
    352 	# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
    353 	#
    354 	$text =~ s{
    355 				(						# save in $1
    356 					^					# start of line  (with /m)
    357 					<($block_tags_b)	# start tag = $2
    358 					\b					# word break
    359 					(.*\n)*?			# any number of lines, minimally matching
    360 					.*</\2>				# the matching end tag
    361 					[ \t]*				# trailing spaces/tabs
    362 					(?=\n+|\Z)	# followed by a newline or end of document
    363 				)
    364 			}{
    365 				my $key = md5_hex($1);
    366 				$g_html_blocks{$key} = $1;
    367 				"\n\n" . $key . "\n\n";
    368 			}egmx;
    369 	# Special case just for <hr />. It was easier to make a special case than
    370 	# to make the other regex more complicated.	
    371 	$text =~ s{
    372 				(?:
    373 					(?<=\n\n)		# Starting after a blank line
    374 					|				# or
    375 					\A\n?			# the beginning of the doc
    376 				)
    377 				(						# save in $1
    378 					[ ]{0,$less_than_tab}
    379 					<(hr)				# start tag = $2
    380 					\b					# word break
    381 					([^<>])*?			# 
    382 					/?>					# the matching end tag
    383 					[ \t]*
    384 					(?=\n{2,}|\Z)		# followed by a blank line or end of document
    385 				)
    386 			}{
    387 				my $key = md5_hex($1);
    388 				$g_html_blocks{$key} = $1;
    389 				"\n\n" . $key . "\n\n";
    390 			}egx;
    391 
    392 	# Special case for standalone HTML comments:
    393 	$text =~ s{
    394 				(?:
    395 					(?<=\n\n)		# Starting after a blank line
    396 					|				# or
    397 					\A\n?			# the beginning of the doc
    398 				)
    399 				(						# save in $1
    400 					[ ]{0,$less_than_tab}
    401 					(?s:
    402 						<!
    403 						(--.*?--\s*)+
    404 						>
    405 					)
    406 					[ \t]*
    407 					(?=\n{2,}|\Z)		# followed by a blank line or end of document
    408 				)
    409 			}{
    410 				my $key = md5_hex($1);
    411 				$g_html_blocks{$key} = $1;
    412 				"\n\n" . $key . "\n\n";
    413 			}egx;
    414 
    415 
    416 	return $text;
    417 }
    418 
    419 
    420 sub _RunBlockGamut {
    421 #
    422 # These are all the transformations that form block-level
    423 # tags like paragraphs, headers, and list items.
    424 #
    425 	my $text = shift;
    426 
    427 	$text = _DoHeaders($text);
    428 
    429 	# Do Horizontal Rules:
    430 	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
    431 	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
    432 	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
    433 
    434 	$text = _DoLists($text);
    435 
    436 	$text = _DoCodeBlocks($text);
    437 
    438 	$text = _DoBlockQuotes($text);
    439 
    440 	# We already ran _HashHTMLBlocks() before, in Markdown(), but that
    441 	# was to escape raw HTML in the original Markdown source. This time,
    442 	# we're escaping the markup we've just created, so that we don't wrap
    443 	# <p> tags around block-level tags.
    444 	$text = _HashHTMLBlocks($text);
    445 
    446 	$text = _FormParagraphs($text);
    447 
    448 	return $text;
    449 }
    450 
    451 
    452 sub _RunSpanGamut {
    453 #
    454 # These are all the transformations that occur *within* block-level
    455 # tags like paragraphs, headers, and list items.
    456 #
    457 	my $text = shift;
    458 
    459 	$text = _DoCodeSpans($text);
    460 
    461 	$text = _EscapeSpecialChars($text);
    462 
    463 	# Process anchor and image tags. Images must come first,
    464 	# because ![foo][f] looks like an anchor.
    465 	$text = _DoImages($text);
    466 	$text = _DoAnchors($text);
    467 
    468 	# Make links out of things like `<http://example.com/>`
    469 	# Must come after _DoAnchors(), because you can use < and >
    470 	# delimiters in inline links like [this](<url>).
    471 	$text = _DoAutoLinks($text);
    472 
    473 	$text = _EncodeAmpsAndAngles($text);
    474 
    475 	$text = _DoItalicsAndBold($text);
    476 
    477 	# Do hard breaks:
    478 	$text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
    479 
    480 	return $text;
    481 }
    482 
    483 
    484 sub _EscapeSpecialChars {
    485 	my $text = shift;
    486 	my $tokens ||= _TokenizeHTML($text);
    487 
    488 	$text = '';   # rebuild $text from the tokens
    489 # 	my $in_pre = 0;	 # Keep track of when we're inside <pre> or <code> tags.
    490 # 	my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
    491 
    492 	foreach my $cur_token (@$tokens) {
    493 		if ($cur_token->[0] eq "tag") {
    494 			# Within tags, encode * and _ so they don't conflict
    495 			# with their use in Markdown for italics and strong.
    496 			# We're replacing each such character with its
    497 			# corresponding MD5 checksum value; this is likely
    498 			# overkill, but it should prevent us from colliding
    499 			# with the escape values by accident.
    500 			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
    501 			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
    502 			$text .= $cur_token->[1];
    503 		} else {
    504 			my $t = $cur_token->[1];
    505 			$t = _EncodeBackslashEscapes($t);
    506 			$text .= $t;
    507 		}
    508 	}
    509 	return $text;
    510 }
    511 
    512 
    513 sub _DoAnchors {
    514 #
    515 # Turn Markdown link shortcuts into XHTML <a> tags.
    516 #
    517 	my $text = shift;
    518 
    519 	#
    520 	# First, handle reference-style links: [link text] [id]
    521 	#
    522 	$text =~ s{
    523 		(					# wrap whole match in $1
    524 		  \[
    525 		    ($g_nested_brackets)	# link text = $2
    526 		  \]
    527 
    528 		  [ ]?				# one optional space
    529 		  (?:\n[ ]*)?		# one optional newline followed by spaces
    530 
    531 		  \[
    532 		    (.*?)		# id = $3
    533 		  \]
    534 		)
    535 	}{
    536 		my $result;
    537 		my $whole_match = $1;
    538 		my $link_text   = $2;
    539 		my $link_id     = lc $3;
    540 
    541 		if ($link_id eq "") {
    542 			$link_id = lc $link_text;     # for shortcut links like [this][].
    543 		}
    544 
    545 		if (defined $g_urls{$link_id}) {
    546 			my $url = $g_urls{$link_id};
    547 			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    548 			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
    549 			$result = "<a href=\"$url\"";
    550 			if ( defined $g_titles{$link_id} ) {
    551 				my $title = $g_titles{$link_id};
    552 				$title =~ s! \* !$g_escape_table{'*'}!gx;
    553 				$title =~ s!  _ !$g_escape_table{'_'}!gx;
    554 				$result .=  " title=\"$title\"";
    555 			}
    556 			$result .= ">$link_text</a>";
    557 		}
    558 		else {
    559 			$result = $whole_match;
    560 		}
    561 		$result;
    562 	}xsge;
    563 
    564 	#
    565 	# Next, inline-style links: [link text](url "optional title")
    566 	#
    567 	$text =~ s{
    568 		(				# wrap whole match in $1
    569 		  \[
    570 		    ($g_nested_brackets)	# link text = $2
    571 		  \]
    572 		  \(			# literal paren
    573 		  	[ \t]*
    574 			<?(.*?)>?	# href = $3
    575 		  	[ \t]*
    576 			(			# $4
    577 			  (['"])	# quote char = $5
    578 			  (.*?)		# Title = $6
    579 			  \5		# matching quote
    580 			)?			# title is optional
    581 		  \)
    582 		)
    583 	}{
    584 		my $result;
    585 		my $whole_match = $1;
    586 		my $link_text   = $2;
    587 		my $url	  		= $3;
    588 		my $title		= $6;
    589 
    590 		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    591 		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
    592 		$result = "<a href=\"$url\"";
    593 
    594 		if (defined $title) {
    595 			$title =~ s/"/&quot;/g;
    596 			$title =~ s! \* !$g_escape_table{'*'}!gx;
    597 			$title =~ s!  _ !$g_escape_table{'_'}!gx;
    598 			$result .=  " title=\"$title\"";
    599 		}
    600 
    601 		$result .= ">$link_text</a>";
    602 
    603 		$result;
    604 	}xsge;
    605 
    606 	return $text;
    607 }
    608 
    609 
    610 sub _DoImages {
    611 #
    612 # Turn Markdown image shortcuts into <img> tags.
    613 #
    614 	my $text = shift;
    615 
    616 	#
    617 	# First, handle reference-style labeled images: ![alt text][id]
    618 	#
    619 	$text =~ s{
    620 		(				# wrap whole match in $1
    621 		  !\[
    622 		    (.*?)		# alt text = $2
    623 		  \]
    624 
    625 		  [ ]?				# one optional space
    626 		  (?:\n[ ]*)?		# one optional newline followed by spaces
    627 
    628 		  \[
    629 		    (.*?)		# id = $3
    630 		  \]
    631 
    632 		)
    633 	}{
    634 		my $result;
    635 		my $whole_match = $1;
    636 		my $alt_text    = $2;
    637 		my $link_id     = lc $3;
    638 
    639 		if ($link_id eq "") {
    640 			$link_id = lc $alt_text;     # for shortcut links like ![this][].
    641 		}
    642 
    643 		$alt_text =~ s/"/&quot;/g;
    644 		if (defined $g_urls{$link_id}) {
    645 			my $url = $g_urls{$link_id};
    646 			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    647 			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
    648 			$result = "<img src=\"$url\" alt=\"$alt_text\"";
    649 			if (defined $g_titles{$link_id}) {
    650 				my $title = $g_titles{$link_id};
    651 				$title =~ s! \* !$g_escape_table{'*'}!gx;
    652 				$title =~ s!  _ !$g_escape_table{'_'}!gx;
    653 				$result .=  " title=\"$title\"";
    654 			}
    655 			$result .= $g_empty_element_suffix;
    656 		}
    657 		else {
    658 			# If there's no such link ID, leave intact:
    659 			$result = $whole_match;
    660 		}
    661 
    662 		$result;
    663 	}xsge;
    664 
    665 	#
    666 	# Next, handle inline images:  ![alt text](url "optional title")
    667 	# Don't forget: encode * and _
    668 
    669 	$text =~ s{
    670 		(				# wrap whole match in $1
    671 		  !\[
    672 		    (.*?)		# alt text = $2
    673 		  \]
    674 		  \(			# literal paren
    675 		  	[ \t]*
    676 			<?(\S+?)>?	# src url = $3
    677 		  	[ \t]*
    678 			(			# $4
    679 			  (['"])	# quote char = $5
    680 			  (.*?)		# title = $6
    681 			  \5		# matching quote
    682 			  [ \t]*
    683 			)?			# title is optional
    684 		  \)
    685 		)
    686 	}{
    687 		my $result;
    688 		my $whole_match = $1;
    689 		my $alt_text    = $2;
    690 		my $url	  		= $3;
    691 		my $title		= '';
    692 		if (defined($6)) {
    693 			$title		= $6;
    694 		}
    695 
    696 		$alt_text =~ s/"/&quot;/g;
    697 		$title    =~ s/"/&quot;/g;
    698 		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
    699 		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
    700 		$result = "<img src=\"$url\" alt=\"$alt_text\"";
    701 		if (defined $title) {
    702 			$title =~ s! \* !$g_escape_table{'*'}!gx;
    703 			$title =~ s!  _ !$g_escape_table{'_'}!gx;
    704 			$result .=  " title=\"$title\"";
    705 		}
    706 		$result .= $g_empty_element_suffix;
    707 
    708 		$result;
    709 	}xsge;
    710 
    711 	return $text;
    712 }
    713 
    714 
    715 sub _DoHeaders {
    716 	my $text = shift;
    717 
    718 	# Setext-style headers:
    719 	#	  Header 1
    720 	#	  ========
    721 	#  
    722 	#	  Header 2
    723 	#	  --------
    724 	#
    725 	$text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
    726 		"<h1>"  .  _RunSpanGamut($1)  .  "</h1>\n\n";
    727 	}egmx;
    728 
    729 	$text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
    730 		"<h2>"  .  _RunSpanGamut($1)  .  "</h2>\n\n";
    731 	}egmx;
    732 
    733 
    734 	# atx-style headers:
    735 	#	# Header 1
    736 	#	## Header 2
    737 	#	## Header 2 with closing hashes ##
    738 	#	...
    739 	#	###### Header 6
    740 	#
    741 	$text =~ s{
    742 			^(\#{1,6})	# $1 = string of #'s
    743 			[ \t]*
    744 			(.+?)		# $2 = Header text
    745 			[ \t]*
    746 			\#*			# optional closing #'s (not counted)
    747 			\n+
    748 		}{
    749 			my $h_level = length($1);
    750 			"<h$h_level>"  .  _RunSpanGamut($2)  .  "</h$h_level>\n\n";
    751 		}egmx;
    752 
    753 	return $text;
    754 }
    755 
    756 
    757 sub _DoLists {
    758 #
    759 # Form HTML ordered (numbered) and unordered (bulleted) lists.
    760 #
    761 	my $text = shift;
    762 	my $less_than_tab = $g_tab_width - 1;
    763 
    764 	# Re-usable patterns to match list item bullets and number markers:
    765 	my $marker_ul  = qr/[*+-]/;
    766 	my $marker_ol  = qr/\d+[.]/;
    767 	my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
    768 
    769 	# Re-usable pattern to match any entirel ul or ol list:
    770 	my $whole_list = qr{
    771 		(								# $1 = whole list
    772 		  (								# $2
    773 			[ ]{0,$less_than_tab}
    774 			(${marker_any})				# $3 = first list item marker
    775 			[ \t]+
    776 		  )
    777 		  (?s:.+?)
    778 		  (								# $4
    779 			  \z
    780 			|
    781 			  \n{2,}
    782 			  (?=\S)
    783 			  (?!						# Negative lookahead for another list item marker
    784 				[ \t]*
    785 				${marker_any}[ \t]+
    786 			  )
    787 		  )
    788 		)
    789 	}mx;
    790 
    791 	# We use a different prefix before nested lists than top-level lists.
    792 	# See extended comment in _ProcessListItems().
    793 	#
    794 	# Note: There's a bit of duplication here. My original implementation
    795 	# created a scalar regex pattern as the conditional result of the test on
    796 	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
    797 	# substitution once, using the scalar as the pattern. This worked,
    798 	# everywhere except when running under MT on my hosting account at Pair
    799 	# Networks. There, this caused all rebuilds to be killed by the reaper (or
    800 	# perhaps they crashed, but that seems incredibly unlikely given that the
    801 	# same script on the same server ran fine *except* under MT. I've spent
    802 	# more time trying to figure out why this is happening than I'd like to
    803 	# admit. My only guess, backed up by the fact that this workaround works,
    804 	# is that Perl optimizes the substition when it can figure out that the
    805 	# pattern will never change, and when this optimization isn't on, we run
    806 	# afoul of the reaper. Thus, the slightly redundant code to that uses two
    807 	# static s/// patterns rather than one conditional pattern.
    808 
    809 	if ($g_list_level) {
    810 		$text =~ s{
    811 				^
    812 				$whole_list
    813 			}{
    814 				my $list = $1;
    815 				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
    816 				# Turn double returns into triple returns, so that we can make a
    817 				# paragraph for the last item in a list, if necessary:
    818 				$list =~ s/\n{2,}/\n\n\n/g;
    819 				my $result = _ProcessListItems($list, $marker_any);
    820 				$result = "<$list_type>\n" . $result . "</$list_type>\n";
    821 				$result;
    822 			}egmx;
    823 	}
    824 	else {
    825 		$text =~ s{
    826 				(?:(?<=\n\n)|\A\n?)
    827 				$whole_list
    828 			}{
    829 				my $list = $1;
    830 				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
    831 				# Turn double returns into triple returns, so that we can make a
    832 				# paragraph for the last item in a list, if necessary:
    833 				$list =~ s/\n{2,}/\n\n\n/g;
    834 				my $result = _ProcessListItems($list, $marker_any);
    835 				$result = "<$list_type>\n" . $result . "</$list_type>\n";
    836 				$result;
    837 			}egmx;
    838 	}
    839 
    840 
    841 	return $text;
    842 }
    843 
    844 
    845 sub _ProcessListItems {
    846 #
    847 #	Process the contents of a single ordered or unordered list, splitting it
    848 #	into individual list items.
    849 #
    850 
    851 	my $list_str = shift;
    852 	my $marker_any = shift;
    853 
    854 
    855 	# The $g_list_level global keeps track of when we're inside a list.
    856 	# Each time we enter a list, we increment it; when we leave a list,
    857 	# we decrement. If it's zero, we're not in a list anymore.
    858 	#
    859 	# We do this because when we're not inside a list, we want to treat
    860 	# something like this:
    861 	#
    862 	#		I recommend upgrading to version
    863 	#		8. Oops, now this line is treated
    864 	#		as a sub-list.
    865 	#
    866 	# As a single paragraph, despite the fact that the second line starts
    867 	# with a digit-period-space sequence.
    868 	#
    869 	# Whereas when we're inside a list (or sub-list), that line will be
    870 	# treated as the start of a sub-list. What a kludge, huh? This is
    871 	# an aspect of Markdown's syntax that's hard to parse perfectly
    872 	# without resorting to mind-reading. Perhaps the solution is to
    873 	# change the syntax rules such that sub-lists must start with a
    874 	# starting cardinal number; e.g. "1." or "a.".
    875 
    876 	$g_list_level++;
    877 
    878 	# trim trailing blank lines:
    879 	$list_str =~ s/\n{2,}\z/\n/;
    880 
    881 
    882 	$list_str =~ s{
    883 		(\n)?							# leading line = $1
    884 		(^[ \t]*)						# leading whitespace = $2
    885 		($marker_any) [ \t]+			# list marker = $3
    886 		((?s:.+?)						# list item text   = $4
    887 		(\n{1,2}))
    888 		(?= \n* (\z | \2 ($marker_any) [ \t]+))
    889 	}{
    890 		my $item = $4;
    891 		my $leading_line = $1;
    892 		my $leading_space = $2;
    893 
    894 		if ($leading_line or ($item =~ m/\n{2,}/)) {
    895 			$item = _RunBlockGamut(_Outdent($item));
    896 		}
    897 		else {
    898 			# Recursion for sub-lists:
    899 			$item = _DoLists(_Outdent($item));
    900 			chomp $item;
    901 			$item = _RunSpanGamut($item);
    902 		}
    903 
    904 		"<li>" . $item . "</li>\n";
    905 	}egmx;
    906 
    907 	$g_list_level--;
    908 	return $list_str;
    909 }
    910 
    911 
    912 
    913 sub _DoCodeBlocks {
    914 #
    915 #	Process Markdown `<pre><code>` blocks.
    916 #	
    917 
    918 	my $text = shift;
    919 
    920 	$text =~ s{
    921 			(?:\n\n|\A)
    922 			(	            # $1 = the code block -- one or more lines, starting with a space/tab
    923 			  (?:
    924 			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
    925 			    .*\n+
    926 			  )+
    927 			)
    928 			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
    929 		}{
    930 			my $codeblock = $1;
    931 			my $result; # return value
    932 
    933 			$codeblock = _EncodeCode(_Outdent($codeblock));
    934 			$codeblock = _Detab($codeblock);
    935 			$codeblock =~ s/\A\n+//; # trim leading newlines
    936 			$codeblock =~ s/\s+\z//; # trim trailing whitespace
    937 
    938 			$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
    939 
    940 			$result;
    941 		}egmx;
    942 
    943 	return $text;
    944 }
    945 
    946 
    947 sub _DoCodeSpans {
    948 #
    949 # 	*	Backtick quotes are used for <code></code> spans.
    950 # 
    951 # 	*	You can use multiple backticks as the delimiters if you want to
    952 # 		include literal backticks in the code span. So, this input:
    953 #     
    954 #         Just type ``foo `bar` baz`` at the prompt.
    955 #     
    956 #     	Will translate to:
    957 #     
    958 #         <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
    959 #     
    960 #		There's no arbitrary limit to the number of backticks you
    961 #		can use as delimters. If you need three consecutive backticks
    962 #		in your code, use four for delimiters, etc.
    963 #
    964 #	*	You can use spaces to get literal backticks at the edges:
    965 #     
    966 #         ... type `` `bar` `` ...
    967 #     
    968 #     	Turns to:
    969 #     
    970 #         ... type <code>`bar`</code> ...
    971 #
    972 
    973 	my $text = shift;
    974 
    975 	$text =~ s@
    976 			(`+)		# $1 = Opening run of `
    977 			(.+?)		# $2 = The code block
    978 			(?<!`)
    979 			\1			# Matching closer
    980 			(?!`)
    981 		@
    982  			my $c = "$2";
    983  			$c =~ s/^[ \t]*//g; # leading whitespace
    984  			$c =~ s/[ \t]*$//g; # trailing whitespace
    985  			$c = _EncodeCode($c);
    986 			"<code>$c</code>";
    987 		@egsx;
    988 
    989 	return $text;
    990 }
    991 
    992 
    993 sub _EncodeCode {
    994 #
    995 # Encode/escape certain characters inside Markdown code runs.
    996 # The point is that in code, these characters are literals,
    997 # and lose their special Markdown meanings.
    998 #
    999     local $_ = shift;
   1000 
   1001 	# Encode all ampersands; HTML entities are not
   1002 	# entities within a Markdown code span.
   1003 	s/&/&amp;/g;
   1004 
   1005 	# Encode $'s, but only if we're running under Blosxom.
   1006 	# (Blosxom interpolates Perl variables in article bodies.)
   1007 	{
   1008 		no warnings 'once';
   1009     	if (defined($blosxom::version)) {
   1010     		s/\$/&#036;/g;	
   1011     	}
   1012     }
   1013 
   1014 
   1015 	# Do the angle bracket song and dance:
   1016 	s! <  !&lt;!gx;
   1017 	s! >  !&gt;!gx;
   1018 
   1019 	# Now, escape characters that are magic in Markdown:
   1020 	s! \* !$g_escape_table{'*'}!gx;
   1021 	s! _  !$g_escape_table{'_'}!gx;
   1022 	s! {  !$g_escape_table{'{'}!gx;
   1023 	s! }  !$g_escape_table{'}'}!gx;
   1024 	s! \[ !$g_escape_table{'['}!gx;
   1025 	s! \] !$g_escape_table{']'}!gx;
   1026 	s! \\ !$g_escape_table{'\\'}!gx;
   1027 
   1028 	return $_;
   1029 }
   1030 
   1031 
   1032 sub _DoItalicsAndBold {
   1033 	my $text = shift;
   1034 
   1035 	# <strong> must go first:
   1036 	$text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
   1037 		{<strong>$2</strong>}gsx;
   1038 
   1039 	$text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
   1040 		{<em>$2</em>}gsx;
   1041 
   1042 	return $text;
   1043 }
   1044 
   1045 
   1046 sub _DoBlockQuotes {
   1047 	my $text = shift;
   1048 
   1049 	$text =~ s{
   1050 		  (								# Wrap whole match in $1
   1051 			(
   1052 			  ^[ \t]*>[ \t]?			# '>' at the start of a line
   1053 			    .+\n					# rest of the first line
   1054 			  (.+\n)*					# subsequent consecutive lines
   1055 			  \n*						# blanks
   1056 			)+
   1057 		  )
   1058 		}{
   1059 			my $bq = $1;
   1060 			$bq =~ s/^[ \t]*>[ \t]?//gm;	# trim one level of quoting
   1061 			$bq =~ s/^[ \t]+$//mg;			# trim whitespace-only lines
   1062 			$bq = _RunBlockGamut($bq);		# recurse
   1063 
   1064 			$bq =~ s/^/  /g;
   1065 			# These leading spaces screw with <pre> content, so we need to fix that:
   1066 			$bq =~ s{
   1067 					(\s*<pre>.+?</pre>)
   1068 				}{
   1069 					my $pre = $1;
   1070 					$pre =~ s/^  //mg;
   1071 					$pre;
   1072 				}egsx;
   1073 
   1074 			"<blockquote>\n$bq\n</blockquote>\n\n";
   1075 		}egmx;
   1076 
   1077 
   1078 	return $text;
   1079 }
   1080 
   1081 
   1082 sub _FormParagraphs {
   1083 #
   1084 #	Params:
   1085 #		$text - string to process with html <p> tags
   1086 #
   1087 	my $text = shift;
   1088 
   1089 	# Strip leading and trailing lines:
   1090 	$text =~ s/\A\n+//;
   1091 	$text =~ s/\n+\z//;
   1092 
   1093 	my @grafs = split(/\n{2,}/, $text);
   1094 
   1095 	#
   1096 	# Wrap <p> tags.
   1097 	#
   1098 	foreach (@grafs) {
   1099 		unless (defined( $g_html_blocks{$_} )) {
   1100 			$_ = _RunSpanGamut($_);
   1101 			s/^([ \t]*)/<p>/;
   1102 			$_ .= "</p>";
   1103 		}
   1104 	}
   1105 
   1106 	#
   1107 	# Unhashify HTML blocks
   1108 	#
   1109 	foreach (@grafs) {
   1110 		if (defined( $g_html_blocks{$_} )) {
   1111 			$_ = $g_html_blocks{$_};
   1112 		}
   1113 	}
   1114 
   1115 	return join "\n\n", @grafs;
   1116 }
   1117 
   1118 
   1119 sub _EncodeAmpsAndAngles {
   1120 # Smart processing for ampersands and angle brackets that need to be encoded.
   1121 
   1122 	my $text = shift;
   1123 
   1124 	# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
   1125 	#   http://bumppo.net/projects/amputator/
   1126  	$text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
   1127 
   1128 	# Encode naked <'s
   1129  	$text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
   1130 
   1131 	return $text;
   1132 }
   1133 
   1134 
   1135 sub _EncodeBackslashEscapes {
   1136 #
   1137 #   Parameter:  String.
   1138 #   Returns:    The string, with after processing the following backslash
   1139 #               escape sequences.
   1140 #
   1141     local $_ = shift;
   1142 
   1143     s! \\\\  !$g_escape_table{'\\'}!gx;		# Must process escaped backslashes first.
   1144     s! \\`   !$g_escape_table{'`'}!gx;
   1145     s! \\\*  !$g_escape_table{'*'}!gx;
   1146     s! \\_   !$g_escape_table{'_'}!gx;
   1147     s! \\\{  !$g_escape_table{'{'}!gx;
   1148     s! \\\}  !$g_escape_table{'}'}!gx;
   1149     s! \\\[  !$g_escape_table{'['}!gx;
   1150     s! \\\]  !$g_escape_table{']'}!gx;
   1151     s! \\\(  !$g_escape_table{'('}!gx;
   1152     s! \\\)  !$g_escape_table{')'}!gx;
   1153     s! \\>   !$g_escape_table{'>'}!gx;
   1154     s! \\\#  !$g_escape_table{'#'}!gx;
   1155     s! \\\+  !$g_escape_table{'+'}!gx;
   1156     s! \\\-  !$g_escape_table{'-'}!gx;
   1157     s! \\\.  !$g_escape_table{'.'}!gx;
   1158     s{ \\!  }{$g_escape_table{'!'}}gx;
   1159 
   1160     return $_;
   1161 }
   1162 
   1163 
   1164 sub _DoAutoLinks {
   1165 	my $text = shift;
   1166 
   1167 	$text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
   1168 
   1169 	# Email addresses: <address@domain.foo>
   1170 	$text =~ s{
   1171 		<
   1172         (?:mailto:)?
   1173 		(
   1174 			[-.\w]+
   1175 			\@
   1176 			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
   1177 		)
   1178 		>
   1179 	}{
   1180 		_EncodeEmailAddress( _UnescapeSpecialChars($1) );
   1181 	}egix;
   1182 
   1183 	return $text;
   1184 }
   1185 
   1186 
   1187 sub _EncodeEmailAddress {
   1188 #
   1189 #	Input: an email address, e.g. "foo@example.com"
   1190 #
   1191 #	Output: the email address as a mailto link, with each character
   1192 #		of the address encoded as either a decimal or hex entity, in
   1193 #		the hopes of foiling most address harvesting spam bots. E.g.:
   1194 #
   1195 #	  <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
   1196 #       x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
   1197 #       &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
   1198 #
   1199 #	Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
   1200 #	mailing list: <http://tinyurl.com/yu7ue>
   1201 #
   1202 
   1203 	my $addr = shift;
   1204 
   1205 	srand;
   1206 	my @encode = (
   1207 		sub { '&#' .                 ord(shift)   . ';' },
   1208 		sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
   1209 		sub {                            shift          },
   1210 	);
   1211 
   1212 	$addr = "mailto:" . $addr;
   1213 
   1214 	$addr =~ s{(.)}{
   1215 		my $char = $1;
   1216 		if ( $char eq '@' ) {
   1217 			# this *must* be encoded. I insist.
   1218 			$char = $encode[int rand 1]->($char);
   1219 		} elsif ( $char ne ':' ) {
   1220 			# leave ':' alone (to spot mailto: later)
   1221 			my $r = rand;
   1222 			# roughly 10% raw, 45% hex, 45% dec
   1223 			$char = (
   1224 				$r > .9   ?  $encode[2]->($char)  :
   1225 				$r < .45  ?  $encode[1]->($char)  :
   1226 							 $encode[0]->($char)
   1227 			);
   1228 		}
   1229 		$char;
   1230 	}gex;
   1231 
   1232 	$addr = qq{<a href="$addr">$addr</a>};
   1233 	$addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
   1234 
   1235 	return $addr;
   1236 }
   1237 
   1238 
   1239 sub _UnescapeSpecialChars {
   1240 #
   1241 # Swap back in all the special characters we've hidden.
   1242 #
   1243 	my $text = shift;
   1244 
   1245 	while( my($char, $hash) = each(%g_escape_table) ) {
   1246 		$text =~ s/$hash/$char/g;
   1247 	}
   1248     return $text;
   1249 }
   1250 
   1251 
   1252 sub _TokenizeHTML {
   1253 #
   1254 #   Parameter:  String containing HTML markup.
   1255 #   Returns:    Reference to an array of the tokens comprising the input
   1256 #               string. Each token is either a tag (possibly with nested,
   1257 #               tags contained therein, such as <a href="<MTFoo>">, or a
   1258 #               run of text between tags. Each element of the array is a
   1259 #               two-element array; the first is either 'tag' or 'text';
   1260 #               the second is the actual value.
   1261 #
   1262 #
   1263 #   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
   1264 #       <http://www.bradchoate.com/past/mtregex.php>
   1265 #
   1266 
   1267     my $str = shift;
   1268     my $pos = 0;
   1269     my $len = length $str;
   1270     my @tokens;
   1271 
   1272     my $depth = 6;
   1273     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
   1274     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment
   1275                    (?s: <\? .*? \?> ) |              # processing instruction
   1276                    $nested_tags/ix;                   # nested tags
   1277 
   1278     while ($str =~ m/($match)/g) {
   1279         my $whole_tag = $1;
   1280         my $sec_start = pos $str;
   1281         my $tag_start = $sec_start - length $whole_tag;
   1282         if ($pos < $tag_start) {
   1283             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
   1284         }
   1285         push @tokens, ['tag', $whole_tag];
   1286         $pos = pos $str;
   1287     }
   1288     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
   1289     \@tokens;
   1290 }
   1291 
   1292 
   1293 sub _Outdent {
   1294 #
   1295 # Remove one level of line-leading tabs or spaces
   1296 #
   1297 	my $text = shift;
   1298 
   1299 	$text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
   1300 	return $text;
   1301 }
   1302 
   1303 
   1304 sub _Detab {
   1305 #
   1306 # Cribbed from a post by Bart Lateur:
   1307 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
   1308 #
   1309 	my $text = shift;
   1310 
   1311 	$text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
   1312 	return $text;
   1313 }
   1314 
   1315 
   1316 1;
   1317 
   1318 __END__
   1319 
   1320 
   1321 =pod
   1322 
   1323 =head1 NAME
   1324 
   1325 B<Markdown>
   1326 
   1327 
   1328 =head1 SYNOPSIS
   1329 
   1330 B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
   1331     [ I<file> ... ]
   1332 
   1333 
   1334 =head1 DESCRIPTION
   1335 
   1336 Markdown is a text-to-HTML filter; it translates an easy-to-read /
   1337 easy-to-write structured text format into HTML. Markdown's text format
   1338 is most similar to that of plain text email, and supports features such
   1339 as headers, *emphasis*, code blocks, blockquotes, and links.
   1340 
   1341 Markdown's syntax is designed not as a generic markup language, but
   1342 specifically to serve as a front-end to (X)HTML. You can  use span-level
   1343 HTML tags anywhere in a Markdown document, and you can use block level
   1344 HTML tags (like <div> and <table> as well).
   1345 
   1346 For more information about Markdown's syntax, see:
   1347 
   1348     http://daringfireball.net/projects/markdown/
   1349 
   1350 
   1351 =head1 OPTIONS
   1352 
   1353 Use "--" to end switch parsing. For example, to open a file named "-z", use:
   1354 
   1355 	Markdown.pl -- -z
   1356 
   1357 =over 4
   1358 
   1359 
   1360 =item B<--html4tags>
   1361 
   1362 Use HTML 4 style for empty element tags, e.g.:
   1363 
   1364     <br>
   1365 
   1366 instead of Markdown's default XHTML style tags, e.g.:
   1367 
   1368     <br />
   1369 
   1370 
   1371 =item B<-v>, B<--version>
   1372 
   1373 Display Markdown's version number and copyright information.
   1374 
   1375 
   1376 =item B<-s>, B<--shortversion>
   1377 
   1378 Display the short-form version number.
   1379 
   1380 
   1381 =back
   1382 
   1383 
   1384 
   1385 =head1 BUGS
   1386 
   1387 To file bug reports or feature requests (other than topics listed in the
   1388 Caveats section above) please send email to:
   1389 
   1390     support@daringfireball.net
   1391 
   1392 Please include with your report: (1) the example input; (2) the output
   1393 you expected; (3) the output Markdown actually produced.
   1394 
   1395 
   1396 =head1 VERSION HISTORY
   1397 
   1398 See the readme file for detailed release notes for this version.
   1399 
   1400 1.0.1 - 14 Dec 2004
   1401 
   1402 1.0 - 28 Aug 2004
   1403 
   1404 
   1405 =head1 AUTHOR
   1406 
   1407     John Gruber
   1408     http://daringfireball.net
   1409 
   1410     PHP port and other contributions by Michel Fortin
   1411     http://michelf.com
   1412 
   1413 
   1414 =head1 COPYRIGHT AND LICENSE
   1415 
   1416 Copyright (c) 2003-2004 John Gruber   
   1417 <http://daringfireball.net/>   
   1418 All rights reserved.
   1419 
   1420 Redistribution and use in source and binary forms, with or without
   1421 modification, are permitted provided that the following conditions are
   1422 met:
   1423 
   1424 * Redistributions of source code must retain the above copyright notice,
   1425   this list of conditions and the following disclaimer.
   1426 
   1427 * Redistributions in binary form must reproduce the above copyright
   1428   notice, this list of conditions and the following disclaimer in the
   1429   documentation and/or other materials provided with the distribution.
   1430 
   1431 * Neither the name "Markdown" nor the names of its contributors may
   1432   be used to endorse or promote products derived from this software
   1433   without specific prior written permission.
   1434 
   1435 This software is provided by the copyright holders and contributors "as
   1436 is" and any express or implied warranties, including, but not limited
   1437 to, the implied warranties of merchantability and fitness for a
   1438 particular purpose are disclaimed. In no event shall the copyright owner
   1439 or contributors be liable for any direct, indirect, incidental, special,
   1440 exemplary, or consequential damages (including, but not limited to,
   1441 procurement of substitute goods or services; loss of use, data, or
   1442 profits; or business interruption) however caused and on any theory of
   1443 liability, whether in contract, strict liability, or tort (including
   1444 negligence or otherwise) arising in any way out of the use of this
   1445 software, even if advised of the possibility of such damage.
   1446 
   1447 =cut