Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
require 'digest/md5'
def gfm(text)
# Extract pre blocks
extractions = {}
text.gsub!(%r{<pre>.*?</pre>}m) do |match|
md5 = Digest::MD5.hexdigest(match)
extractions[md5] = match
"{gfm-extraction-#{md5}}"
end
# prevent foo_bar_baz from ending up with an italic word in the middle
text.gsub!(/(^(?! {4}|\t)\w+_\w+_\w[\w_]*)/) do |x|
x.gsub('_', '\_') if x.split('').sort.to_s[0..1] == '__'
end
# in very clear cases, let newlines become <br /> tags
text.gsub!(/^[\w\<][^\n]*\n+/) do |x|
x =~ /\n{2}/ ? x : (x.strip!; x << " \n")
end
# Insert pre block extractions
text.gsub!(/\{gfm-extraction-([0-9a-f]{32})\}/) do
"\n\n" + extractions[$1]
end
text
end
if $0 == __FILE__
require 'test/unit'
require 'shoulda'
class GFMTest < Test::Unit::TestCase
context "GFM" do
should "not touch single underscores inside words" do
assert_equal "foo_bar", gfm("foo_bar")
end
should "not touch underscores in code blocks" do
assert_equal " foo_bar_baz", gfm(" foo_bar_baz")
end
should "not touch underscores in pre blocks" do
assert_equal "\n\n<pre>\nfoo_bar_baz\n</pre>", gfm("<pre>\nfoo_bar_baz\n</pre>")
end
should "not treat pre blocks with pre-text differently" do
a = "\n\n<pre>\nthis is `a\\_test` and this\\_too\n</pre>"
b = "hmm<pre>\nthis is `a\\_test` and this\\_too\n</pre>"
assert_equal gfm(a)[2..-1], gfm(b)[3..-1]
end
should "escape two or more underscores inside words" do
assert_equal "foo\\_bar\\_baz", gfm("foo_bar_baz")
end
should "turn newlines into br tags in simple cases" do
assert_equal "foo \nbar", gfm("foo\nbar")
end
should "convert newlines in all groups" do
assert_equal "apple \npear \norange\n\nruby \npython \nerlang",
gfm("apple\npear\norange\n\nruby\npython\nerlang")
end
should "convert newlines in even long groups" do
assert_equal "apple \npear \norange \nbanana\n\nruby \npython \nerlang",
gfm("apple\npear\norange\nbanana\n\nruby\npython\nerlang")
end
should "not convert newlines in lists" do
assert_equal "# foo\n# bar", gfm("# foo\n# bar")
assert_equal "* foo\n* bar", gfm("* foo\n* bar")
end
end
end
end
@keithpitt

This comment has been minimized.

Copy link

commented Mar 31, 2010

Thanks for making this code available - I was pulling my hair out trying to figure out how to accomplish something like this on my site.

@ryansobol

This comment has been minimized.

Copy link

commented Aug 29, 2010

Thanks for sharing! I found one Ruby 1.9 compatibility issue that's worth mentioning. Array#to_s is equivalent to Array#inspect now.
http://eigenclass.org/hiki/Changes+in+Ruby+1.9#l83

The fix is simple -- just change the to_s on line 14 to join and this fine code snippet will run for both Ruby 1.8 and 1.9.

@dave1010

This comment has been minimized.

Copy link

commented Jan 20, 2011

The code [\#1](http://github.com) without the backslash breaks GH's markdown parser. This generates [#1](http://github.com).

Edit: it looks ok now it's posted here. Maybe it's just in the issue tracker / JS previews; try it in the preview box below.

@viking

This comment has been minimized.

Copy link

commented Jan 28, 2011

If you have a pre-formatted code block that contains the string '#1', it gets expanded to a link.

@sunaku

This comment has been minimized.

Copy link

commented Feb 3, 2011

I agree with @viking. Here is an example of the problem:
https://github.com/tenderlove/nokogiri/issues/#issue/405/comment/725920

@lsauer

This comment has been minimized.

Copy link

commented Sep 20, 2011

Why is there a '?' in here.....< pre >.*?< / pre > Does the ruby regex have some weird bug I don't know about.
Normally .+? == .★

@sunaku

This comment has been minimized.

Copy link

commented Sep 20, 2011

The ? prevents .* from matching </pre>. And .+? is not the same as .*. See reluctant matching.

@sineld

This comment has been minimized.

Copy link

commented Feb 23, 2012

Thanks.

@fordnox

This comment has been minimized.

Copy link

commented Mar 28, 2012

is there a port of this to PHP?

@eungjun-yi

This comment has been minimized.

Copy link

commented Apr 10, 2012

Coffeescript port is here: https://gist.github.com/2349475

@koenpunt

This comment has been minimized.

Copy link

commented Jul 28, 2012

@vickychijwani

This comment has been minimized.

Copy link

commented Dec 25, 2012

There's an error in the line break handling code. Take for example this input:

**foo**
bar
baz

On GitHub, this is rendered as expected, because GitHub actually uses redcarpet for rendering GFM on the site:

foo
bar
baz

But using the code here in gfm.rb, it renders as:

foo bar
baz

This is happening because the regex used in gfm.rb is incorrect; it does not add a line break when the line begins with a * (emphasized / strong text) or a > (blockquote). It can rectified as follows:

--- gfm.rb
+++ gfm.2.rb
@@ -15,6 +15,6 @@
   end

   # in very clear cases, let newlines become <br /> tags
-  text.gsub!(/^[\w\<][^\n]*\n+/) do |x|
+  text.gsub!(/^[\w\<\>\*][^\n]*\n+/) do |x|
     x =~ /\n{2}/ ? x : (x.strip!; x << "  \n")
   end
@Triturus

This comment has been minimized.

Copy link

commented Oct 25, 2013

Very good Work

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.