Skip to content

Instantly share code, notes, and snippets.

@samt
Created May 31, 2011 08:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samt/1000161 to your computer and use it in GitHub Desktop.
Save samt/1000161 to your computer and use it in GitHub Desktop.
<?php
/*
* Markdown Parser
* Version 0.1.0
* By Sam Thompson <http://openflame-project.org/>
*
* This code has hereby been released into the public domain
* There's absolutely no warranty.
*
* This class works as a parser for Markdown, placing the data in an array to
* be output to be futher manipulated as needed.
*/
class MarkdownParser
{
/*
* @var raw
*/
public $raw = '';
/*
* @var parsed array
*/
public $parsedText = array();
/*
* @var linklist
*/
public $linkList = array();
/*
* Constructor
*/
public function __construct($raw)
{
$this->raw = (string) $raw;
}
// Location flags
const IN_PARAGRAPH = 1;
const IN_LIST = 2;
const IN_PREFORMAT = 4;
const IN_BLOCKQUOTE = 8;
// Types
const TYPE_H1 = 'h1';
const TYPE_H2 = 'h2';
const TYPE_H3 = 'h3';
const TYPE_H4 = 'h4';
const TYPE_H5 = 'h5';
const TYPE_H6 = 'h6';
const TYPE_UL = 'ul';
const TYPE_OL = 'ol';
const TYPE_P = 'p';
const TYPE_BQ = 'blockquote';
const TYPE_PRE = 'pre';
const TYPE_CODE = 'code';
/*
* Parse
* @return void
*/
public function parse()
{
// Get rid of some annoyances then create an array
$raw = str_replace(array("\r", "\t"), array("", " "), $this->raw);
$lines = explode("\n", $raw);
$buffer = '';
$flag = 0;
$items = array();
foreach($lines as $k => $line)
{
$prevLine = ($k - 1 < 0) ? null : $lines[$k - 1];
// Code/preformated is always first
if ($flag == self::IN_PREFORMAT || substr($line, 0, 3) == '```')
{
if (substr($line, 0, 3) == '```' && $flag != self::IN_PREFORMAT)
{
// Open the pre
$flag = self::IN_PREFORMAT;
$lang = substr(trim($line), 3);
$buffer = ($lang == false) ? self::TYPE_PRE : self::TYPE_CODE;
}
else if(substr($line, 0, 3) == '```')
{
// Close the pre
$this->parsedText[] = array('type' => $buffer, 'lang' => $lang, 'text' => implode("\n", $items));
$flag = 0;
}
else
{
// append to pre
$items[] = $line;
}
continue;
}
// Two line header
if ($k > 0 && strlen($line) == strlen($prevLine) && $this->isHeader($line))
{
$type = ($line[0] == '=') ? $this->getHeading(1) : $this->getHeading(2);
$this->parsedText[] = array('type' => $type, 'text' => $lines[$k-1]);
continue;
}
// Is it an in-line header?
if ($line[0] == '#')
{
for ($i = 0; $i < 6 && $line[$i] == '#'; $i++);
$type = ($i > 6 || $i < 1) ? 1 : $i;
$line = trim(str_replace(str_repeat('#', $i), '', $line));
$this->parsedText[] = array('type' => $this->getHeading($type), 'text' => $line);
continue;
}
// Lists (nasty regexp)
if (preg_match("#^[\s]*[\(]*([0-9]+|\*|\-)[\)\.]*[\s]*([a-z0-9\[\]\(\)\,\.]{1}[a-z0-9\[\]\(\)\,\.\s]*)$#i", $line, $matches))
{
$flag = self::IN_LIST;
$buffer = (is_numeric($matches[1])) ? self::TYPE_OL : self::TYPE_UL;
$items[] = $matches[2];
}
// Blockquote
if ($line[0] == '>')
{
$flag = self::IN_BLOCKQUOTE;
$buffer .= trim(substr($line, 1)) . ' ';
continue;
}
// Linklists
if (preg_match("#^\[([0-9]+)\]: (.*)$#", $line, $matches) == 1)
{
$this->linkList[(int) $matches[1]] = trim($matches[2]);
continue;
}
// Normal paragraph
if (preg_match("#[a-z0-9_\-\.\,\s]+#i", $line) != 0 && ($prevLine === '' || $flag == self::IN_PARAGRAPH))
{
$flag = self::IN_PARAGRAPH;
$buffer .= trim($line) . ' ';
continue;
}
// This is a catch all, MUST BE LAST
if (trim($line) === '')
{
switch ($flag)
{
case self::IN_PARAGRAPH:
$this->parsedText[] = array('type' => self::TYPE_P, 'text' => $buffer);
break;
case self::IN_BLOCKQUOTE:
$this->parsedText[] = array('type' => self::TYPE_BQ, 'text' => $buffer);
break;
case self::IN_LIST:
$this->parsedText[] = array('type' => $buffer, 'items' => $items);
break;
}
$flag = 0;
$buffer = $lang = '';
$items = array();
}
}
}
/*
* Checks to see if we had a header line this round
*/
private function isHeader($ln)
{
if($ln == str_repeat('=', strlen($ln)))
{
return true;
}
else if($ln == str_repeat('-', strlen($ln)))
{
return true;
}
return false;
}
public function getHeading($level)
{
switch($level)
{
case 1: return self::TYPE_H1; break;
case 2: return self::TYPE_H2; break;
case 3: return self::TYPE_H3; break;
case 4: return self::TYPE_H4; break;
case 5: return self::TYPE_H5; break;
case 6: return self::TYPE_H6; break;
}
}
}

Heading 1

This is a test of stuff and other junk.

Block Quote for such a thing that would be a nice quote More of the blockquote

Google Is a website, and this is an inline style

Heading 2

  • This is a list
  • With items
  • Item again
  • yay items

Hello!

Below is a basic code example.

<?php
echo "Hello World!";
?>

You can see more crap in the OpenFlame Documentation

<?php
include 'MarkdownParser.php';
$text = new MarkdownParser(file_get_contents('test.md'));
echo "<pre>";
echo $text->raw;
echo "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n";
$text->parse();
var_dump($text->parsedText);
echo "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n";
var_dump($text->linkList);
echo "</pre>";
@samt
Copy link
Author

samt commented May 31, 2011

Just a note, this does not touch inline elements yet, working on the proper way to do that.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment