From ac9a3661754808e186f36b58dedf62b394fa06f2 Mon Sep 17 00:00:00 2001 From: Nicolas Boisselier Date: Tue, 9 Jun 2015 16:57:10 +0100 Subject: [PATCH] parsedown/UPDATE.sh --- lib/php/parsedown/Parsedown.php | 932 +++++++++++++++++--------------- lib/php/parsedown/README.md | 34 +- lib/php/parsedown/UPDATE.sh | 8 + 3 files changed, 535 insertions(+), 439 deletions(-) create mode 100755 lib/php/parsedown/UPDATE.sh diff --git a/lib/php/parsedown/Parsedown.php b/lib/php/parsedown/Parsedown.php index b622d158..55640fb2 100755 --- a/lib/php/parsedown/Parsedown.php +++ b/lib/php/parsedown/Parsedown.php @@ -15,28 +15,19 @@ class Parsedown { - # - # Philosophy + # ~ - # Parsedown recognises that the Markdown syntax is optimised for humans so - # it tries to read like one. It goes through text line by line. It looks at - # how lines start to identify blocks. It looks for special characters to - # identify inline elements. + const version = '1.5.3'; - # # ~ function text($text) { # make sure no definitions are set - $this->Definitions = array(); + $this->DefinitionData = array(); # standardize line breaks - $text = str_replace("\r\n", "\n", $text); - $text = str_replace("\r", "\n", $text); - - # replace tabs with spaces - $text = str_replace("\t", ' ', $text); + $text = str_replace(array("\r\n", "\r"), "\n", $text); # remove surrounding line breaks $text = trim($text, "\n"); @@ -57,8 +48,6 @@ class Parsedown # Setters # - private $breaksEnabled; - function setBreaksEnabled($breaksEnabled) { $this->breaksEnabled = $breaksEnabled; @@ -66,7 +55,7 @@ class Parsedown return $this; } - private $markupEscaped; + protected $breaksEnabled; function setMarkupEscaped($markupEscaped) { @@ -75,15 +64,26 @@ class Parsedown return $this; } + protected $markupEscaped; + + function setUrlsLinked($urlsLinked) + { + $this->urlsLinked = $urlsLinked; + + return $this; + } + + protected $urlsLinked = true; + # # Lines # protected $BlockTypes = array( - '#' => array('Atx'), + '#' => array('Header'), '*' => array('Rule', 'List'), '+' => array('List'), - '-' => array('Setext', 'Table', 'Rule', 'List'), + '-' => array('SetextHeader', 'Table', 'Rule', 'List'), '0' => array('List'), '1' => array('List'), '2' => array('List'), @@ -96,8 +96,9 @@ class Parsedown '9' => array('List'), ':' => array('Table'), '<' => array('Comment', 'Markup'), - '=' => array('Setext'), + '=' => array('SetextHeader'), '>' => array('Quote'), + '[' => array('Reference'), '_' => array('Rule'), '`' => array('FencedCode'), '|' => array('Table'), @@ -113,7 +114,7 @@ class Parsedown # ~ protected $unmarkedBlockTypes = array( - 'CodeBlock', + 'Code', ); # @@ -136,6 +137,23 @@ class Parsedown continue; } + if (strpos($line, "\t") !== false) + { + $parts = explode("\t", $line); + + $line = $parts[0]; + + unset($parts[0]); + + foreach ($parts as $part) + { + $shortage = 4 - mb_strlen($line, 'utf-8') % 4; + + $line .= str_repeat(' ', $shortage); + $line .= $part; + } + } + $indent = 0; while (isset($line[$indent]) and $line[$indent] === ' ') @@ -153,7 +171,7 @@ class Parsedown if (isset($CurrentBlock['incomplete'])) { - $Block = $this->{'addTo'.$CurrentBlock['type']}($Line, $CurrentBlock); + $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); if (isset($Block)) { @@ -163,9 +181,9 @@ class Parsedown } else { - if (method_exists($this, 'complete'.$CurrentBlock['type'])) + if (method_exists($this, 'block'.$CurrentBlock['type'].'Complete')) { - $CurrentBlock = $this->{'complete'.$CurrentBlock['type']}($CurrentBlock); + $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); } unset($CurrentBlock['incomplete']); @@ -176,21 +194,6 @@ class Parsedown $marker = $text[0]; - if (isset($this->DefinitionTypes[$marker])) - { - foreach ($this->DefinitionTypes[$marker] as $definitionType) - { - $Definition = $this->{'identify'.$definitionType}($Line, $CurrentBlock); - - if (isset($Definition)) - { - $this->Definitions[$definitionType][$Definition['id']] = $Definition['data']; - - continue 2; - } - } - } - # ~ $blockTypes = $this->unmarkedBlockTypes; @@ -208,7 +211,7 @@ class Parsedown foreach ($blockTypes as $blockType) { - $Block = $this->{'identify'.$blockType}($Line, $CurrentBlock); + $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); if (isset($Block)) { @@ -216,12 +219,12 @@ class Parsedown if ( ! isset($Block['identified'])) { - $Elements []= $CurrentBlock['element']; + $Blocks []= $CurrentBlock; $Block['identified'] = true; } - if (method_exists($this, 'addTo'.$blockType)) + if (method_exists($this, 'block'.$blockType.'Continue')) { $Block['incomplete'] = true; } @@ -240,9 +243,9 @@ class Parsedown } else { - $Elements []= $CurrentBlock['element']; + $Blocks []= $CurrentBlock; - $CurrentBlock = $this->buildParagraph($Line); + $CurrentBlock = $this->paragraph($Line); $CurrentBlock['identified'] = true; } @@ -250,59 +253,49 @@ class Parsedown # ~ - if (isset($CurrentBlock['incomplete']) and method_exists($this, 'complete'.$CurrentBlock['type'])) + if (isset($CurrentBlock['incomplete']) and method_exists($this, 'block'.$CurrentBlock['type'].'Complete')) { - $CurrentBlock = $this->{'complete'.$CurrentBlock['type']}($CurrentBlock); + $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); } # ~ - $Elements []= $CurrentBlock['element']; - - unset($Elements[0]); + $Blocks []= $CurrentBlock; - # ~ - - $markup = $this->elements($Elements); + unset($Blocks[0]); # ~ - return $markup; - } - - # - # Atx + $markup = ''; - protected function identifyAtx($Line) - { - if (isset($Line['text'][1])) + foreach ($Blocks as $Block) { - $level = 1; - - while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') + if (isset($Block['hidden'])) { - $level ++; + continue; } - $text = trim($Line['text'], '# '); + $markup .= "\n"; + $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); + } - $Block = array( - 'element' => array( - 'name' => 'h' . min(6, $level), - 'text' => $text, - 'handler' => 'line', - ), - ); + $markup .= "\n"; - return $Block; - } + # ~ + + return $markup; } # # Code - protected function identifyCodeBlock($Line) + protected function blockCode($Line, $Block = null) { + if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) + { + return; + } + if ($Line['indent'] >= 4) { $text = substr($Line['body'], 4); @@ -322,7 +315,7 @@ class Parsedown } } - protected function addToCodeBlock($Line, $Block) + protected function blockCodeContinue($Line, $Block) { if ($Line['indent'] >= 4) { @@ -343,7 +336,7 @@ class Parsedown } } - protected function completeCodeBlock($Block) + protected function blockCodeComplete($Block) { $text = $Block['element']['text']['text']; @@ -357,7 +350,7 @@ class Parsedown # # Comment - protected function identifyComment($Line) + protected function blockComment($Line) { if ($this->markupEscaped) { @@ -367,7 +360,7 @@ class Parsedown if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') { $Block = array( - 'element' => $Line['body'], + 'markup' => $Line['body'], ); if (preg_match('/-->$/', $Line['text'])) @@ -379,14 +372,14 @@ class Parsedown } } - protected function addToComment($Line, array $Block) + protected function blockCommentContinue($Line, array $Block) { if (isset($Block['closed'])) { return; } - $Block['element'] .= "\n" . $Line['body']; + $Block['markup'] .= "\n" . $Line['body']; if (preg_match('/-->$/', $Line['text'])) { @@ -399,7 +392,7 @@ class Parsedown # # Fenced Code - protected function identifyFencedCode($Line) + protected function blockFencedCode($Line) { if (preg_match('/^(['.$Line['text'][0].']{3,})[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) { @@ -430,7 +423,7 @@ class Parsedown } } - protected function addToFencedCode($Line, $Block) + protected function blockFencedCodeContinue($Line, $Block) { if (isset($Block['complete'])) { @@ -458,7 +451,7 @@ class Parsedown return $Block; } - protected function completeFencedCode($Block) + protected function blockFencedCodeComplete($Block) { $text = $Block['element']['text']['text']; @@ -469,10 +462,43 @@ class Parsedown return $Block; } + # + # Header + + protected function blockHeader($Line) + { + if (isset($Line['text'][1])) + { + $level = 1; + + while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') + { + $level ++; + } + + if ($level > 6) + { + return; + } + + $text = trim($Line['text'], '# '); + + $Block = array( + 'element' => array( + 'name' => 'h' . min(6, $level), + 'text' => $text, + 'handler' => 'line', + ), + ); + + return $Block; + } + } + # # List - protected function identifyList($Line) + protected function blockList($Line) { list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); @@ -501,9 +527,9 @@ class Parsedown } } - protected function addToList($Line, array $Block) + protected function blockListContinue($Line, array $Block) { - if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'[ ]+(.*)/', $Line['text'], $matches)) + if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) { if (isset($Block['interrupted'])) { @@ -514,11 +540,13 @@ class Parsedown unset($Block['li']); + $text = isset($matches[1]) ? $matches[1] : ''; + $Block['li'] = array( 'name' => 'li', 'handler' => 'li', 'text' => array( - $matches[1], + $text, ), ); @@ -527,6 +555,11 @@ class Parsedown return $Block; } + if ($Line['text'][0] === '[' and $this->blockReference($Line)) + { + return $Block; + } + if ( ! isset($Block['interrupted'])) { $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); @@ -553,7 +586,7 @@ class Parsedown # # Quote - protected function identifyQuote($Line) + protected function blockQuote($Line) { if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) { @@ -569,7 +602,7 @@ class Parsedown } } - protected function addToQuote($Line, array $Block) + protected function blockQuoteContinue($Line, array $Block) { if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) { @@ -596,9 +629,9 @@ class Parsedown # # Rule - protected function identifyRule($Line) + protected function blockRule($Line) { - if (preg_match('/^(['.$Line['text'][0].'])([ ]{0,2}\1){2,}[ ]*$/', $Line['text'])) + if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) { $Block = array( 'element' => array( @@ -613,7 +646,7 @@ class Parsedown # # Setext - protected function identifySetext($Line, array $Block = null) + protected function blockSetextHeader($Line, array $Block = null) { if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) { @@ -631,14 +664,14 @@ class Parsedown # # Markup - protected function identifyMarkup($Line) + protected function blockMarkup($Line) { if ($this->markupEscaped) { return; } - if (preg_match('/^<(\w[\w\d]*)(?:[ ][^>]*)?(\/?)[ ]*>/', $Line['text'], $matches)) + if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) { if (in_array($matches[1], $this->textLevelElements)) { @@ -646,36 +679,54 @@ class Parsedown } $Block = array( - 'element' => $Line['body'], + 'name' => $matches[1], + 'depth' => 0, + 'markup' => $Line['text'], ); - if ($matches[2] or in_array($matches[1], $this->voidElements) or preg_match('/<\/'.$matches[1].'>[ ]*$/', $Line['text'])) + $length = strlen($matches[0]); + + $remainder = substr($Line['text'], $length); + + if (trim($remainder) === '') { - $Block['closed'] = true; + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + $Block['closed'] = true; + + $Block['void'] = true; + } } else { - $Block['depth'] = 0; - $Block['name'] = $matches[1]; + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + return; + } + + if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) + { + $Block['closed'] = true; + } } return $Block; } } - protected function addToMarkup($Line, array $Block) + protected function blockMarkupContinue($Line, array $Block) { if (isset($Block['closed'])) { return; } - if (preg_match('/<'.$Block['name'].'([ ].*[\'"])?[ ]*>/', $Line['text'])) # opening tag + if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open { $Block['depth'] ++; } - if (stripos($Line['text'], '') !== false) # closing tag + if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close { if ($Block['depth'] > 0) { @@ -689,20 +740,49 @@ class Parsedown if (isset($Block['interrupted'])) { - $Block['element'] .= "\n"; + $Block['markup'] .= "\n"; unset($Block['interrupted']); } - $Block['element'] .= "\n".$Line['body']; + $Block['markup'] .= "\n".$Line['body']; return $Block; } + # + # Reference + + protected function blockReference($Line) + { + if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) + { + $id = strtolower($matches[1]); + + $Data = array( + 'url' => $matches[2], + 'title' => null, + ); + + if (isset($matches[3])) + { + $Data['title'] = $matches[3]; + } + + $this->DefinitionData['Reference'][$id] = $Data; + + $Block = array( + 'hidden' => true, + ); + + return $Block; + } + } + # # Table - protected function identifyTable($Line, array $Block = null) + protected function blockTable($Line, array $Block = null) { if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) { @@ -736,7 +816,7 @@ class Parsedown $alignment = 'left'; } - if (substr($dividerCell, -1) === ':') + if (substr($dividerCell, - 1) === ':') { $alignment = $alignment === 'left' ? 'center' : 'right'; } @@ -770,7 +850,7 @@ class Parsedown $alignment = $alignments[$index]; $HeaderElement['attributes'] = array( - 'align' => $alignment, + 'style' => 'text-align: '.$alignment.';', ); } @@ -809,8 +889,13 @@ class Parsedown } } - protected function addToTable($Line, array $Block) + protected function blockTableContinue($Line, array $Block) { + if (isset($Block['interrupted'])) + { + return; + } + if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) { $Elements = array(); @@ -820,9 +905,9 @@ class Parsedown $row = trim($row); $row = trim($row, '|'); - $cells = explode('|', $row); + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); - foreach ($cells as $index => $cell) + foreach ($matches[0] as $index => $cell) { $cell = trim($cell); @@ -835,7 +920,7 @@ class Parsedown if (isset($Block['alignments'][$index])) { $Element['attributes'] = array( - 'align' => $Block['alignments'][$index], + 'style' => 'text-align: '.$Block['alignments'][$index].';', ); } @@ -854,35 +939,11 @@ class Parsedown } } - # - # Definitions - # - - protected function identifyReference($Line) - { - if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) - { - $Definition = array( - 'id' => strtolower($matches[1]), - 'data' => array( - 'url' => $matches[2], - ), - ); - - if (isset($matches[3])) - { - $Definition['data']['title'] = $matches[3]; - } - - return $Definition; - } - } - # # ~ # - protected function buildParagraph($Line) + protected function paragraph($Line) { $Block = array( 'element' => array( @@ -896,93 +957,27 @@ class Parsedown } # - # ~ - # - - protected function element(array $Element) - { - $markup = '<'.$Element['name']; - - if (isset($Element['attributes'])) - { - foreach ($Element['attributes'] as $name => $value) - { - $markup .= ' '.$name.'="'.$value.'"'; - } - } - - if (isset($Element['text'])) - { - $markup .= '>'; - - if (isset($Element['handler'])) - { - $markup .= $this->$Element['handler']($Element['text']); - } - else - { - $markup .= $Element['text']; - } - - $markup .= ''; - } - else - { - $markup .= ' />'; - } - - return $markup; - } - - protected function elements(array $Elements) - { - $markup = ''; - - foreach ($Elements as $Element) - { - if ($Element === null) - { - continue; - } - - $markup .= "\n"; - - if (is_string($Element)) # because of Markup - { - $markup .= $Element; - - continue; - } - - $markup .= $this->element($Element); - } - - $markup .= "\n"; - - return $markup; - } - - # - # Spans + # Inline Elements # - protected $SpanTypes = array( - '!' => array('Link'), # ? - '&' => array('Ampersand'), + protected $InlineTypes = array( + '"' => array('SpecialCharacter'), + '!' => array('Image'), + '&' => array('SpecialCharacter'), '*' => array('Emphasis'), - '/' => array('Url'), - '<' => array('UrlTag', 'EmailTag', 'Tag', 'LessThan'), - '>' => array('GreaterThan'), + ':' => array('Url'), + '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), + '>' => array('SpecialCharacter'), '[' => array('Link'), '_' => array('Emphasis'), - '`' => array('InlineCode'), + '`' => array('Code'), '~' => array('Strikethrough'), '\\' => array('EscapeSequence'), ); # ~ - protected $spanMarkerList = '*_!&[<>/`~\\'; + protected $inlineMarkerList = '!"*_&[:<>`~\\'; # # ~ @@ -992,64 +987,58 @@ class Parsedown { $markup = ''; - $remainder = $text; + $unexaminedText = $text; $markerPosition = 0; - while ($excerpt = strpbrk($remainder, $this->spanMarkerList)) + while ($excerpt = strpbrk($unexaminedText, $this->inlineMarkerList)) { $marker = $excerpt[0]; - $markerPosition += strpos($remainder, $marker); + $markerPosition += strpos($unexaminedText, $marker); $Excerpt = array('text' => $excerpt, 'context' => $text); - foreach ($this->SpanTypes[$marker] as $spanType) + foreach ($this->InlineTypes[$marker] as $inlineType) { - $handler = 'identify'.$spanType; + $Inline = $this->{'inline'.$inlineType}($Excerpt); - $Span = $this->$handler($Excerpt); - - if ( ! isset($Span)) + if ( ! isset($Inline)) { continue; } - # The identified span can be ahead of the marker. - - if (isset($Span['position']) and $Span['position'] > $markerPosition) + if (isset($Inline['position']) and $Inline['position'] > $markerPosition) # position is ahead of marker { continue; } - # Spans that start at the position of their marker don't have to set a position. - - if ( ! isset($Span['position'])) + if ( ! isset($Inline['position'])) { - $Span['position'] = $markerPosition; + $Inline['position'] = $markerPosition; } - $plainText = substr($text, 0, $Span['position']); + $unmarkedText = substr($text, 0, $Inline['position']); - $markup .= $this->readPlainText($plainText); + $markup .= $this->unmarkedText($unmarkedText); - $markup .= isset($Span['markup']) ? $Span['markup'] : $this->element($Span['element']); + $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); - $text = substr($text, $Span['position'] + $Span['extent']); + $text = substr($text, $Inline['position'] + $Inline['extent']); - $remainder = $text; + $unexaminedText = $text; $markerPosition = 0; continue 2; } - $remainder = substr($excerpt, 1); + $unexaminedText = substr($excerpt, 1); $markerPosition ++; } - $markup .= $this->readPlainText($text); + $markup .= $this->unmarkedText($text); return $markup; } @@ -1058,63 +1047,83 @@ class Parsedown # ~ # - protected function identifyUrl($Excerpt) + protected function inlineCode($Excerpt) { - if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '/') - { - return; - } + $marker = $Excerpt['text'][0]; - if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) + if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0][0]), - 'position' => $matches[0][1], + 'extent' => strlen($matches[0]), 'element' => array( - 'name' => 'a', - 'text' => $url, - 'attributes' => array( - 'href' => $url, - ), + 'name' => 'code', + 'text' => $text, ), ); } } - protected function identifyAmpersand($Excerpt) + protected function inlineEmailTag($Excerpt) { - if ( ! preg_match('/^&#?\w+;/', $Excerpt['text'])) + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) { + $url = $matches[1]; + + if ( ! isset($matches[2])) + { + $url = 'mailto:' . $url; + } + return array( - 'markup' => '&', - 'extent' => 1, + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => 'a', + 'text' => $matches[1], + 'attributes' => array( + 'href' => $url, + ), + ), ); } } - protected function identifyStrikethrough($Excerpt) + protected function inlineEmphasis($Excerpt) { if ( ! isset($Excerpt['text'][1])) { return; } - if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches)) + $marker = $Excerpt['text'][0]; + + if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches)) { - return array( - 'extent' => strlen($matches[0]), - 'element' => array( - 'name' => 'del', - 'text' => $matches[1], - 'handler' => 'line', - ), - ); + $emphasis = 'strong'; } + elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches)) + { + $emphasis = 'em'; + } + else + { + return; + } + + return array( + 'extent' => strlen($matches[0]), + 'element' => array( + 'name' => $emphasis, + 'handler' => 'line', + 'text' => $matches[1], + ), + ); } - protected function identifyEscapeSequence($Excerpt) + protected function inlineEscapeSequence($Excerpt) { if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) { @@ -1125,234 +1134,306 @@ class Parsedown } } - protected function identifyLessThan() + protected function inlineImage($Excerpt) { - return array( - 'markup' => '<', - 'extent' => 1, + if ( ! isset($Excerpt['text'][1]) or $Excerpt['text'][1] !== '[') + { + return; + } + + $Excerpt['text']= substr($Excerpt['text'], 1); + + $Link = $this->inlineLink($Excerpt); + + if ($Link === null) + { + return; + } + + $Inline = array( + 'extent' => $Link['extent'] + 1, + 'element' => array( + 'name' => 'img', + 'attributes' => array( + 'src' => $Link['element']['attributes']['href'], + 'alt' => $Link['element']['text'], + ), + ), ); + + $Inline['element']['attributes'] += $Link['element']['attributes']; + + unset($Inline['element']['attributes']['href']); + + return $Inline; } - protected function identifyGreaterThan() + protected function inlineLink($Excerpt) { + $Element = array( + 'name' => 'a', + 'handler' => 'line', + 'text' => null, + 'attributes' => array( + 'href' => null, + 'title' => null, + ), + ); + + $extent = 0; + + $remainder = $Excerpt['text']; + + if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches)) + { + $Element['text'] = $matches[1]; + + $extent += strlen($matches[0]); + + $remainder = substr($remainder, $extent); + } + else + { + return; + } + + if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]*"|\'[^\']*\'))?[)]/', $remainder, $matches)) + { + $Element['attributes']['href'] = $matches[1]; + + if (isset($matches[2])) + { + $Element['attributes']['title'] = substr($matches[2], 1, - 1); + } + + $extent += strlen($matches[0]); + } + else + { + if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) + { + $definition = strlen($matches[1]) ? $matches[1] : $Element['text']; + $definition = strtolower($definition); + + $extent += strlen($matches[0]); + } + else + { + $definition = strtolower($Element['text']); + } + + if ( ! isset($this->DefinitionData['Reference'][$definition])) + { + return; + } + + $Definition = $this->DefinitionData['Reference'][$definition]; + + $Element['attributes']['href'] = $Definition['url']; + $Element['attributes']['title'] = $Definition['title']; + } + + $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); + return array( - 'markup' => '>', - 'extent' => 1, + 'extent' => $extent, + 'element' => $Element, ); } - protected function identifyUrlTag($Excerpt) + protected function inlineMarkup($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(https?:[\/]{2}[^\s]+?)>/i', $Excerpt['text'], $matches)) + if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) { - $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); + return; + } + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches)) + { return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + + if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], + 'extent' => strlen($matches[0]), + ); + } + + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + { + return array( + 'markup' => $matches[0], 'extent' => strlen($matches[0]), - 'element' => array( - 'name' => 'a', - 'text' => $url, - 'attributes' => array( - 'href' => $url, - ), - ), ); } } - protected function identifyEmailTag($Excerpt) + protected function inlineSpecialCharacter($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\S+?@\S+?)>/', $Excerpt['text'], $matches)) + if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) + { + return array( + 'markup' => '&', + 'extent' => 1, + ); + } + + $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); + + if (isset($SpecialCharacter[$Excerpt['text'][0]])) + { + return array( + 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', + 'extent' => 1, + ); + } + } + + protected function inlineStrikethrough($Excerpt) + { + if ( ! isset($Excerpt['text'][1])) + { + return; + } + + if ($Excerpt['text'][1] === '~' and preg_match('/^~~(?=\S)(.+?)(?<=\S)~~/', $Excerpt['text'], $matches)) { return array( 'extent' => strlen($matches[0]), 'element' => array( - 'name' => 'a', + 'name' => 'del', 'text' => $matches[1], - 'attributes' => array( - 'href' => 'mailto:'.$matches[1], - ), + 'handler' => 'line', ), ); } } - protected function identifyTag($Excerpt) + protected function inlineUrl($Excerpt) { - if ($this->markupEscaped) + if ($this->urlsLinked !== true or ! isset($Excerpt['text'][2]) or $Excerpt['text'][2] !== '/') { return; } - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<\/?\w.*?>/s', $Excerpt['text'], $matches)) + if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) { - return array( - 'markup' => $matches[0], - 'extent' => strlen($matches[0]), + $Inline = array( + 'extent' => strlen($matches[0][0]), + 'position' => $matches[0][1], + 'element' => array( + 'name' => 'a', + 'text' => $matches[0][0], + 'attributes' => array( + 'href' => $matches[0][0], + ), + ), ); + + return $Inline; } } - protected function identifyInlineCode($Excerpt) + protected function inlineUrlTag($Excerpt) { - $marker = $Excerpt['text'][0]; - - if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) { - $text = $matches[2]; - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - $text = preg_replace("/[ ]*\n/", ' ', $text); + $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); return array( 'extent' => strlen($matches[0]), 'element' => array( - 'name' => 'code', - 'text' => $text, + 'name' => 'a', + 'text' => $url, + 'attributes' => array( + 'href' => $url, + ), ), ); } } - protected function identifyLink($Excerpt) - { - $extent = $Excerpt['text'][0] === '!' ? 1 : 0; + # ~ - if (strpos($Excerpt['text'], ']') and preg_match('/\[((?:[^][]|(?R))*)\]/', $Excerpt['text'], $matches)) + protected function unmarkedText($text) + { + if ($this->breaksEnabled) { - $Link = array('text' => $matches[1], 'label' => strtolower($matches[1])); - - $extent += strlen($matches[0]); + $text = preg_replace('/[ ]*\n/', "
\n", $text); + } + else + { + $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "
\n", $text); + $text = str_replace(" \n", "\n", $text); + } - $substring = substr($Excerpt['text'], $extent); + return $text; + } - if (preg_match('/^\s*\[([^][]+)\]/', $substring, $matches)) - { - $Link['label'] = strtolower($matches[1]); + # + # Handlers + # - if (isset($this->Definitions['Reference'][$Link['label']])) - { - $Link += $this->Definitions['Reference'][$Link['label']]; + protected function element(array $Element) + { + $markup = '<'.$Element['name']; - $extent += strlen($matches[0]); - } - else - { - return; - } - } - elseif (isset($this->Definitions['Reference'][$Link['label']])) + if (isset($Element['attributes'])) + { + foreach ($Element['attributes'] as $name => $value) { - $Link += $this->Definitions['Reference'][$Link['label']]; - - if (preg_match('/^[ ]*\[\]/', $substring, $matches)) + if ($value === null) { - $extent += strlen($matches[0]); + continue; } + + $markup .= ' '.$name.'="'.$value.'"'; } - elseif (preg_match('/^\([ ]*(.*?)(?:[ ]+[\'"](.+?)[\'"])?[ ]*\)/', $substring, $matches)) - { - $Link['url'] = $matches[1]; + } - if (isset($matches[2])) - { - $Link['title'] = $matches[2]; - } + if (isset($Element['text'])) + { + $markup .= '>'; - $extent += strlen($matches[0]); + if (isset($Element['handler'])) + { + $markup .= $this->{$Element['handler']}($Element['text']); } else { - return; + $markup .= $Element['text']; } - } - else - { - return; - } - $url = str_replace(array('&', '<'), array('&', '<'), $Link['url']); - - if ($Excerpt['text'][0] === '!') - { - $Element = array( - 'name' => 'img', - 'attributes' => array( - 'src' => $url, - 'alt' => $Link['text'], - ), - ); + $markup .= ''; } else { - $Element = array( - 'name' => 'a', - 'handler' => 'line', - 'text' => $Link['text'], - 'attributes' => array( - 'href' => $url, - ), - ); - } - - if (isset($Link['title'])) - { - $Element['attributes']['title'] = $Link['title']; + $markup .= ' />'; } - return array( - 'extent' => $extent, - 'element' => $Element, - ); + return $markup; } - protected function identifyEmphasis($Excerpt) + protected function elements(array $Elements) { - if ( ! isset($Excerpt['text'][1])) - { - return; - } - - $marker = $Excerpt['text'][0]; + $markup = ''; - if ($Excerpt['text'][1] === $marker and preg_match($this->StrongRegex[$marker], $Excerpt['text'], $matches)) - { - $emphasis = 'strong'; - } - elseif (preg_match($this->EmRegex[$marker], $Excerpt['text'], $matches)) - { - $emphasis = 'em'; - } - else + foreach ($Elements as $Element) { - return; + $markup .= "\n" . $this->element($Element); } - return array( - 'extent' => strlen($matches[0]), - 'element' => array( - 'name' => $emphasis, - 'handler' => 'line', - 'text' => $matches[1], - ), - ); - } - - # - # ~ - - protected function readPlainText($text) - { - $breakMarker = $this->breaksEnabled ? "\n" : array(" \n", "\\\n"); - - $text = str_replace($breakMarker, "
\n", $text); - $text = str_replace(" \n", "\n", $text); + $markup .= "\n"; - return $text; + return $markup; } - # # ~ - # protected function li($lines) { @@ -1374,7 +1455,18 @@ class Parsedown } # - # Multiton + # Deprecated Methods + # + + function parse($text) + { + $markup = $this->text($text); + + return $markup; + } + + # + # Static Methods # static function instance($name = 'default') @@ -1393,31 +1485,17 @@ class Parsedown private static $instances = array(); - # - # Deprecated Methods - # - - /** - * @deprecated in favor of "text" - */ - function parse($text) - { - $markup = $this->text($text); - - return $markup; - } - # # Fields # - protected $Definitions; + protected $DefinitionData; # - # Read-only + # Read-Only protected $specialCharacters = array( - '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', ); protected $StrongRegex = array( @@ -1430,6 +1508,8 @@ class Parsedown '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', ); + protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + protected $voidElements = array( 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', ); diff --git a/lib/php/parsedown/README.md b/lib/php/parsedown/README.md index 1984e065..283a6ed5 100644 --- a/lib/php/parsedown/README.md +++ b/lib/php/parsedown/README.md @@ -1,18 +1,20 @@ ## Parsedown +[![Build Status](https://img.shields.io/travis/erusev/parsedown/master.svg?style=flat-square)](https://travis-ci.org/erusev/parsedown) + + Better Markdown Parser in PHP -[[ demo ]](http://parsedown.org/demo) +[See Demo](http://parsedown.org/demo) ### Features * [Fast](http://parsedown.org/speed) * [Consistent](http://parsedown.org/consistency) * [GitHub flavored](https://help.github.com/articles/github-flavored-markdown) -* [Tested](http://parsedown.org/tests/) in PHP 5.2, 5.3, 5.4, 5.5, 5.6 and [hhvm](http://www.hhvm.com/) -* Extensible -* [Markdown Extra extension](https://github.com/erusev/parsedown-extra) new -* [JavaScript port](https://github.com/hkdobrev/parsedown.js) under development new +* [Tested](http://parsedown.org/tests/) in 5.3, 5.4, 5.5, 5.6 and [HHVM](http://www.hhvm.com/) +* [Extensible](https://github.com/erusev/parsedown/wiki/Writing-Extensions) +* [Markdown Extra extension](https://github.com/erusev/parsedown-extra) ### Installation @@ -30,14 +32,20 @@ More examples in [the wiki](https://github.com/erusev/parsedown/wiki/Usage) and ### Questions -**How does Parsedown work?**
-Parsedown recognises that the Markdown syntax is optimised for humans so it tries to read like one. It goes through text line by line. It looks at how lines start to identify blocks. It looks for special characters to identify inline elements. +**How does Parsedown work?** + +It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line start with a `-` then it perhaps belong to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). + +We call this approach "line based". We believe that Parsedown is the first Markdown parser to use it. Since the release of Parsedown, other developers have used the same approach to develop other Markdown parsers in PHP and in other languages. + +**Is it compliant with CommonMark?** + +It passes most of the CommonMark tests. Most of the tests that don't pass deal with cases that are quite uncommon. Still, as CommonMark matures, compliance should improve. + +**Who uses it?** -**Why doesn’t Parsedown use namespaces?**
-Using namespaces would mean dropping support for PHP 5.2. We believe that since Parsedown is a single class with an uncommon name, making this trade wouldn't be worth it. +[phpDocumentor](http://www.phpdoc.org/), [October CMS](http://octobercms.com/), [Bolt CMS](http://bolt.cm/), [Kirby CMS](http://getkirby.com/), [Grav CMS](http://getgrav.org/), [Statamic CMS](http://www.statamic.com/), [RaspberryPi.org](http://www.raspberrypi.org/) and [more](https://www.versioneye.com/php/erusev:parsedown/references). -**Is Parsedown compliant with CommonMark?**
-We are [working on it](https://github.com/erusev/parsedown/tree/commonmark). +**How can I help?** -**Who uses Parsedown?**
-[phpDocumentor](http://www.phpdoc.org/), [October CMS](http://octobercms.com/), [Bolt CMS](http://bolt.cm/), [RaspberryPi.org](http://www.raspberrypi.org/) and [more](https://www.versioneye.com/php/erusev:parsedown/references). +Use it, star it, share it and if you feel generous, [donate some money](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=528P3NZQMP8N2). diff --git a/lib/php/parsedown/UPDATE.sh b/lib/php/parsedown/UPDATE.sh new file mode 100755 index 00000000..79fd3f8e --- /dev/null +++ b/lib/php/parsedown/UPDATE.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +declare -r DIR_NAME="$(cd "$(dirname "${0}")"; echo $(pwd))" +declare -r NAME="$(basename "${DIR_NAME}")" +cd $DIR_NAME || exit +echo $DIR_NAME $NAME +git clone https://github.com/erusev/parsedown +rsync --exclude='.svn' --exclude='.travis.yml' --exclude='test/' -vuc --stats $NAME/ ./ +rm -rf $NAME -- 2.47.3