From 7609e1a6246b8ce5796afe4796bd949a3188ce7f Mon Sep 17 00:00:00 2001 From: Mike West Date: Sun, 21 Nov 2010 16:36:14 +0100 Subject: [PATCH 1/6] Adding Ruby block comment syntax. Working towards GH-22. --- lib/rocco.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/rocco.rb b/lib/rocco.rb index 0b2cbb6..3c77ece 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -202,14 +202,14 @@ class Rocco language = @options[:language] comment_styles = { "bash" => { :single => "#", :multi => nil }, - "c" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, - "coffee-script" => { :single => "#", :multi => { :start => "###", :middle => nil, :end => "###" } }, - "cpp" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, - "java" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, - "js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "c" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "coffee-script" => { :single => "#", :multi => { :start => "###", :middle => nil, :end => "###" } }, + "cpp" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "java" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, "lua" => { :single => "--", :multi => nil }, - "python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } }, - "rb" => { :single => "#", :multi => nil }, + "python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } }, + "rb" => { :single => "#", :multi => { :start => '=begin', :middle => nil, :end => '=end' } }, "scheme" => { :single => ";;", :multi => nil }, } From d067210faaac21718d74d22c3e65c7e8765f9173 Mon Sep 17 00:00:00 2001 From: Mike West Date: Sun, 21 Nov 2010 16:53:22 +0100 Subject: [PATCH 2/6] Refactoring comment_char internals: prepping for block comments --- lib/rocco.rb | 18 ++++++++++++------ test/test_commentchar_detection.rb | 10 +++++----- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/lib/rocco.rb b/lib/rocco.rb index 3c77ece..664169d 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -100,19 +100,25 @@ class Rocco if detect_language() != "text" # then assign the detected language to `:language`, and look for # comment characters based on that language - @options[:language] = detect_language() + @options[:language] = detect_language() @options[:comment_chars] = generate_comment_chars() # If we didn't detect a language, but the user provided one, use it # to look around for comment characters to override the default. elsif @options[:language] != defaults[:language] @options[:comment_chars] = generate_comment_chars() + + # If neither is true, then convert the default comment character string + # into the comment_char syntax (we'll discuss that syntax in detail when + # we get to `generate_comment_chars()` in a moment. + else + @options[:comment_chars] = { :single => @options[:comment_chars], :multi => "" } end # Turn `:comment_chars` into a regex matching a series of spaces, the # `:comment_chars` string, and the an optional space. We'll use that # to detect single-line comments. - @comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?") + @comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars][:single]}\s?") # `parse()` the file contents stored in `@data`. Run the result through `split()` # and that result through `highlight()` to generate the final section list. @@ -214,9 +220,9 @@ class Rocco } if comment_styles[language] - comment_styles[language][:single] + comment_styles[language] else - @options[:comment_chars] + { :single => @options[:comment_chars], :multi => nil } end end end @@ -281,7 +287,7 @@ class Rocco # Combine all code blocks into a single big stream and run through either # `pygmentize(1)` or - code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n") + code_stream = code_blocks.join("\n\n#{@options[:comment_chars][:single]} DIVIDER\n\n") if pygmentize? code_html = highlight_pygmentize(code_stream) @@ -292,7 +298,7 @@ class Rocco # Do some post-processing on the pygments output to split things back # into sections and remove partial `
` blocks.
     code_html = code_html.
-      split(/\n*#{@options[:comment_chars]} DIVIDER<\/span>\n*/m).
+      split(/\n*#{@options[:comment_chars][:single]} DIVIDER<\/span>\n*/m).
       map { |code| code.sub(/\n?
/m, '') }.
       map { |code| code.sub(/\n?<\/pre><\/div>\n/m, '') }
 
diff --git a/test/test_commentchar_detection.rb b/test/test_commentchar_detection.rb
index 1c87196..efb4ee2 100644
--- a/test/test_commentchar_detection.rb
+++ b/test/test_commentchar_detection.rb
@@ -3,22 +3,22 @@ require File.dirname(__FILE__) + '/helper'
 class RoccoAutomaticCommentChars < Test::Unit::TestCase
     def test_basic_detection
         r = Rocco.new( 'filename.js' ) { "" }
-        assert_equal "//", r.options[:comment_chars]
+        assert_equal "//", r.options[:comment_chars][:single]
     end
     def test_fallback_language
         r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "js" } ) { "" }
-        assert_equal "//", r.options[:comment_chars]
+        assert_equal "//", r.options[:comment_chars][:single]
     end
     def test_fallback_default
         r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" }
-        assert_equal "#", r.options[:comment_chars], "`:comment_chars` should be `#` when falling back to defaults."
+        assert_equal "#", r.options[:comment_chars][:single], "`:comment_chars` should be `#` when falling back to defaults."
     end
     def test_fallback_user
         r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :comment_chars => "user" } ) { "" }
-        assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings."
+        assert_equal "user", r.options[:comment_chars][:single], "`:comment_chars` should be the user's default when falling back to user-provided settings."
     end
     def test_fallback_user_with_unknown_language
         r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "not-a-language", :comment_chars => "user" } ) { "" }
-        assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings."
+        assert_equal "user", r.options[:comment_chars][:single], "`:comment_chars` should be the user's default when falling back to user-provided settings."
     end
 end

From f177a9d7e27d562b782147032afea0b3e3703538 Mon Sep 17 00:00:00 2001
From: Mike West 
Date: Mon, 22 Nov 2010 08:25:40 +0100
Subject: [PATCH 3/6] Block comment parsing: basics.

Block comments are parsed out, but the commentchar removal isn't working
yet.  I'll refactor that code out of it's current home, and move it into
`parse`, as I need to know what _kind_ of comment it is that I'm
stripping.  Carrying that metadata around doesn't make any sense, so
I'll just convert the comment on the fly into a set of non-comment
strings.
---
 lib/rocco.rb                | 50 +++++++++++++++++++++++++++++++------
 test/test_block_comments.rb | 36 ++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 8 deletions(-)
 create mode 100644 test/test_block_comments.rb

diff --git a/lib/rocco.rb b/lib/rocco.rb
index 664169d..880b139 100644
--- a/lib/rocco.rb
+++ b/lib/rocco.rb
@@ -112,7 +112,7 @@ class Rocco
     # into the comment_char syntax (we'll discuss that syntax in detail when
     # we get to `generate_comment_chars()` in a moment.
     else
-      @options[:comment_chars]    = { :single => @options[:comment_chars], :multi => "" }
+      @options[:comment_chars]    = { :single => @options[:comment_chars], :multi => nil }
     end
 
     # Turn `:comment_chars` into a regex matching a series of spaces, the 
@@ -241,16 +241,50 @@ class Rocco
     lines = data.split("\n")
     lines.shift if lines[0] =~ /^\#\!/
     lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language]
+
+    # To detect both block comments and single-line comments, we'll set
+    # up a tiny state machine, and loop through each line of the file.
+    # This requires an `in_comment_block` boolean, and a few regular 
+    # expressions for line tests.
+    in_comment_block    = false
+    single_line_comment, block_comment_start, block_comment_end = nil, nil, nil
+    if not @options[:comment_chars][:single].nil?
+      single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\s?")
+    end
+    if not @options[:comment_chars][:multi].nil?
+      require 'pp'
+      pp @options[:comment_chars]
+      block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\s*$")
+      block_comment_end   = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\s*$")
+    end
     lines.each do |line|
-      case line
-      when @comment_pattern
-        if code.any?
-          sections << [docs, code]
-          docs, code = [], []
+      # If we're currently in a comment block, check whether the line matches
+      # the _end_ of a comment block.
+      if in_comment_block
+        if block_comment_end && line.match( block_comment_end )
+          in_comment_block = false
+        else
+          docs << line
         end
-        docs << line
+      # Otherwise, check whether the line matches the beginning of a block, or
+      # a single-line comment all on it's lonesome.  In either case, if there's
+      # code, start a new section
       else
-        code << line
+        if block_comment_start && line.match( block_comment_start )
+          in_comment_block = true
+          if code.any?
+            sections << [docs, code]
+            docs, code = [], []
+          end
+        elsif single_line_comment && line.match( single_line_comment )
+          if code.any?
+            sections << [docs, code]
+            docs, code = [], []
+          end
+          docs << line
+        else
+          code << line
+        end
       end
     end
     sections << [docs, code] if docs.any? || code.any?
diff --git a/test/test_block_comments.rb b/test/test_block_comments.rb
new file mode 100644
index 0000000..48e968f
--- /dev/null
+++ b/test/test_block_comments.rb
@@ -0,0 +1,36 @@
+require File.dirname(__FILE__) + '/helper'
+
+class RoccoBlockCommentTest < Test::Unit::TestCase
+    def test_basics
+        r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ " * Comment 1" ], [ "def codeblock", "end" ] ]
+            ],
+            r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n" )
+        )
+        assert_equal(
+            [
+                [ [ " * Comment 1a", " * Comment 1b" ], [ "def codeblock", "end" ] ]
+            ],
+            r.parse( "/**\n * Comment 1a\n * Comment 1b\n*/\ndef codeblock\nend\n" )
+        )
+    end
+    def test_multiple_blocks
+        r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ " * Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ " * Comment 2" ], [] ]
+            ],
+            r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n/**\n * Comment 2\n*/\n" )
+        )
+        assert_equal(
+            [
+                [ [ " * Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ " * Comment 2" ], [ "if false", "end" ] ]
+            ],
+            r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n/**\n * Comment 2\n*/\nif false\nend" )
+        )
+    end
+end

From 77dff765b6b226756b53ae9202a308dcfc63cf9f Mon Sep 17 00:00:00 2001
From: Mike West 
Date: Mon, 22 Nov 2010 08:41:54 +0100
Subject: [PATCH 4/6] Fixing tests for block comments.

---
 lib/rocco.rb                 | 19 ++++++++++---------
 test/test_basics.rb          | 12 ++++++------
 test/test_block_comments.rb  | 12 ++++++------
 test/test_skippable_lines.rb | 21 +++++++++++----------
 4 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/lib/rocco.rb b/lib/rocco.rb
index 880b139..5fc31e8 100644
--- a/lib/rocco.rb
+++ b/lib/rocco.rb
@@ -247,15 +247,16 @@ class Rocco
     # This requires an `in_comment_block` boolean, and a few regular 
     # expressions for line tests.
     in_comment_block    = false
-    single_line_comment, block_comment_start, block_comment_end = nil, nil, nil
+    single_line_comment, block_comment_start, block_comment_mid, block_comment_end = nil, nil, nil, nil
     if not @options[:comment_chars][:single].nil?
-      single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\s?")
+      single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\\s?")
     end
     if not @options[:comment_chars][:multi].nil?
-      require 'pp'
-      pp @options[:comment_chars]
-      block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\s*$")
-      block_comment_end   = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\s*$")
+      block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*$")
+      block_comment_end   = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$")
+      if @options[:comment_chars][:multi][:middle]
+        block_comment_mid = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:middle])}\\s?")
+      end
     end
     lines.each do |line|
       # If we're currently in a comment block, check whether the line matches
@@ -264,7 +265,7 @@ class Rocco
         if block_comment_end && line.match( block_comment_end )
           in_comment_block = false
         else
-          docs << line
+          docs << line.sub( block_comment_mid, '' )
         end
       # Otherwise, check whether the line matches the beginning of a block, or
       # a single-line comment all on it's lonesome.  In either case, if there's
@@ -281,7 +282,7 @@ class Rocco
             sections << [docs, code]
             docs, code = [], []
           end
-          docs << line
+          docs << line.sub( single_line_comment, '' )
         else
           code << line
         end
@@ -297,7 +298,7 @@ class Rocco
   def split(sections)
     docs_blocks, code_blocks = [], []
     sections.each do |docs,code|
-      docs_blocks << docs.map { |line| line.sub(@comment_pattern, '') }.join("\n")
+      docs_blocks << docs.join("\n")
       code_blocks << code.map do |line|
         tabs = line.match(/^(\t+)/)
         tabs ? line.sub(/^\t+/, '  ' * tabs.captures[0].length) : line
diff --git a/test/test_basics.rb b/test/test_basics.rb
index 18a1777..ad315ab 100644
--- a/test/test_basics.rb
+++ b/test/test_basics.rb
@@ -27,14 +27,14 @@ class RoccoBasicTests < Test::Unit::TestCase
         r = Rocco.new( 'test' ) { "" } # Generate throwaway instance so I can test `parse`
         assert_equal(
             [
-                [ [ "# Comment 1" ], [ "def codeblock", "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ]
             ],
             r.parse( "# Comment 1\ndef codeblock\nend\n" )
         )
         assert_equal(
             [
-                [ [ "# Comment 1" ], [ "def codeblock" ] ],
-                [ [ "# Comment 2" ], [ "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock" ] ],
+                [ [ "Comment 2" ], [ "end" ] ]
             ],
             r.parse( "# Comment 1\ndef codeblock\n# Comment 2\nend\n" )
         )
@@ -47,7 +47,7 @@ class RoccoBasicTests < Test::Unit::TestCase
                 [ "Comment 1" ],
                 [ "def codeblock\nend" ]
             ],
-            r.split([ [ [ "# Comment 1" ], [ "def codeblock", "end" ] ] ])
+            r.split([ [ [ "Comment 1" ], [ "def codeblock", "end" ] ] ])
         )
         assert_equal(
             [
@@ -55,8 +55,8 @@ class RoccoBasicTests < Test::Unit::TestCase
                 [ "def codeblock", "end" ]
             ],
             r.split( [
-                [ [ "# Comment 1" ], [ "def codeblock" ] ],
-                [ [ "# Comment 2" ], [ "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock" ] ],
+                [ [ "Comment 2" ], [ "end" ] ]
             ] )
         )
     end
diff --git a/test/test_block_comments.rb b/test/test_block_comments.rb
index 48e968f..14b95b9 100644
--- a/test/test_block_comments.rb
+++ b/test/test_block_comments.rb
@@ -5,13 +5,13 @@ class RoccoBlockCommentTest < Test::Unit::TestCase
         r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
         assert_equal(
             [
-                [ [ " * Comment 1" ], [ "def codeblock", "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ]
             ],
             r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n" )
         )
         assert_equal(
             [
-                [ [ " * Comment 1a", " * Comment 1b" ], [ "def codeblock", "end" ] ]
+                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ]
             ],
             r.parse( "/**\n * Comment 1a\n * Comment 1b\n*/\ndef codeblock\nend\n" )
         )
@@ -20,15 +20,15 @@ class RoccoBlockCommentTest < Test::Unit::TestCase
         r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
         assert_equal(
             [
-                [ [ " * Comment 1" ], [ "def codeblock", "end" ] ],
-                [ [ " * Comment 2" ], [] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2" ], [] ]
             ],
             r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n/**\n * Comment 2\n*/\n" )
         )
         assert_equal(
             [
-                [ [ " * Comment 1" ], [ "def codeblock", "end" ] ],
-                [ [ " * Comment 2" ], [ "if false", "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2" ], [ "if false", "end" ] ]
             ],
             r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n/**\n * Comment 2\n*/\nif false\nend" )
         )
diff --git a/test/test_skippable_lines.rb b/test/test_skippable_lines.rb
index 1baaf10..2672afe 100644
--- a/test/test_skippable_lines.rb
+++ b/test/test_skippable_lines.rb
@@ -5,8 +5,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
     r = Rocco.new( 'filename.sh' ) { "" }
     assert_equal(
         [
-            [ [ "# Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 1" ], [ "def codeblock" ] ],
+            [ [ "Comment 2" ], [ "end" ] ]
         ],
         r.parse( "#!/usr/bin/env bash\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
         "Shebang should be stripped when it appears as the first line."
@@ -16,8 +16,9 @@ class RoccoSkippableLines < Test::Unit::TestCase
     r = Rocco.new( 'filename.sh' ) { "" }
     assert_equal(
         [
-            [ [ "# Comment 1", "#!/usr/bin/env bash" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            # @TODO: `#!/` shouldn't be recognized as a comment.
+            [ [ "Comment 1", "!/usr/bin/env bash" ], [ "def codeblock" ] ],
+            [ [ "Comment 2" ], [ "end" ] ]
         ],
         r.parse( "# Comment 1\n#!/usr/bin/env bash\ndef codeblock\n# Comment 2\nend\n" ),
         "Shebang shouldn't be stripped anywhere other than as the first line."
@@ -27,8 +28,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
     r = Rocco.new( 'filename.rb' ) { "" }
     assert_equal(
         [
-            [ [ "# Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 1" ], [ "def codeblock" ] ],
+            [ [ "Comment 2" ], [ "end" ] ]
         ],
         r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
         "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
@@ -38,8 +39,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
     r = Rocco.new( 'filename.py' ) { "" }
     assert_equal(
         [
-            [ [ "# Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 1" ], [ "def codeblock" ] ],
+            [ [ "Comment 2" ], [ "end" ] ]
         ],
         r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
         "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
@@ -49,8 +50,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
     r = Rocco.new( 'filename.sh' ) { "" }
     assert_equal(
         [
-            [ [ "# encoding: utf-8", "# Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "encoding: utf-8", "Comment 1" ], [ "def codeblock" ] ],
+            [ [ "Comment 2" ], [ "end" ] ]
         ],
         r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
         "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."

From d0211ecc99a2a9490fd902d16c2de0412417d6fb Mon Sep 17 00:00:00 2001
From: Mike West 
Date: Mon, 22 Nov 2010 13:38:03 +0100
Subject: [PATCH 5/6] Python block comments (no middle character), and CSS
 syntax

---
 lib/rocco.rb                |  5 +++--
 test/test_block_comments.rb | 35 +++++++++++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/lib/rocco.rb b/lib/rocco.rb
index 5fc31e8..87b516b 100644
--- a/lib/rocco.rb
+++ b/lib/rocco.rb
@@ -211,6 +211,7 @@ class Rocco
         "c"             =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
         "coffee-script" =>  { :single => "#",   :multi => { :start => "###",    :middle => nil, :end => "###" } },
         "cpp"           =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
+        "css"           =>  { :single => nil,   :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
         "java"          =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
         "js"            =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
         "lua"           =>  { :single => "--",  :multi => nil },
@@ -265,7 +266,7 @@ class Rocco
         if block_comment_end && line.match( block_comment_end )
           in_comment_block = false
         else
-          docs << line.sub( block_comment_mid, '' )
+          docs << line.sub( block_comment_mid || '', '' )
         end
       # Otherwise, check whether the line matches the beginning of a block, or
       # a single-line comment all on it's lonesome.  In either case, if there's
@@ -282,7 +283,7 @@ class Rocco
             sections << [docs, code]
             docs, code = [], []
           end
-          docs << line.sub( single_line_comment, '' )
+          docs << line.sub( single_line_comment || '', '' )
         else
           code << line
         end
diff --git a/test/test_block_comments.rb b/test/test_block_comments.rb
index 14b95b9..a61fceb 100644
--- a/test/test_block_comments.rb
+++ b/test/test_block_comments.rb
@@ -7,13 +7,13 @@ class RoccoBlockCommentTest < Test::Unit::TestCase
             [
                 [ [ "Comment 1" ], [ "def codeblock", "end" ] ]
             ],
-            r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n" )
+            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n" )
         )
         assert_equal(
             [
                 [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ]
             ],
-            r.parse( "/**\n * Comment 1a\n * Comment 1b\n*/\ndef codeblock\nend\n" )
+            r.parse( "/**\n * Comment 1a\n * Comment 1b\n */\ndef codeblock\nend\n" )
         )
     end
     def test_multiple_blocks
@@ -23,14 +23,41 @@ class RoccoBlockCommentTest < Test::Unit::TestCase
                 [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
                 [ [ "Comment 2" ], [] ]
             ],
-            r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n/**\n * Comment 2\n*/\n" )
+            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\n" )
         )
         assert_equal(
             [
                 [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
                 [ [ "Comment 2" ], [ "if false", "end" ] ]
             ],
-            r.parse( "/**\n * Comment 1\n*/\ndef codeblock\nend\n/**\n * Comment 2\n*/\nif false\nend" )
+            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\nif false\nend" )
+        )
+    end
+    def test_block_without_middle_character
+        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ "  Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "  Comment 2" ], [] ]
+            ],
+            r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\n" )
+        )
+        assert_equal(
+            [
+                [ [ "  Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "  Comment 2" ], [ "if false", "end" ] ]
+            ],
+            r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\nif false\nend" )
+        )
+    end 
+    def test_language_without_single_line_comments
+        r = Rocco.new( 'test', '', { :language => "css" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2" ], [ "if false", "end" ] ]
+            ],
+            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\nif false\nend" )
         )
     end
 end

From b11543d382249b8eb50b250ed2fa8840b8683b60 Mon Sep 17 00:00:00 2001
From: Mike West 
Date: Mon, 22 Nov 2010 14:38:01 +0100
Subject: [PATCH 6/6] Normalizing leading space in comments

That is:

    def function:
        """
            This is a comment
            with _lots_ of leading
            space!  OMG!
        """
        pass

Will parse into:

    [
        [
            [   "This is a comment",
                "with _lots_ of leading",
                "space!  OMG!"
            ],
            ...
        ]
    ]
---
 lib/rocco.rb                       | 35 ++++++++++++++++++++++++++----
 test/test_block_comments.rb        |  8 +++----
 test/test_comment_normalization.rb | 24 ++++++++++++++++++++
 3 files changed, 59 insertions(+), 8 deletions(-)
 create mode 100644 test/test_comment_normalization.rb

diff --git a/lib/rocco.rb b/lib/rocco.rb
index 87b516b..f27e7bc 100644
--- a/lib/rocco.rb
+++ b/lib/rocco.rb
@@ -233,13 +233,15 @@ class Rocco
 
   # Parse the raw file data into a list of two-tuples. Each tuple has the
   # form `[docs, code]` where both elements are arrays containing the
-  # raw lines parsed from the input file. The first line is ignored if it
-  # is a shebang line.  We also ignore the PEP 263 encoding information in
-  # python sourcefiles, and the similar ruby 1.9 syntax.
+  # raw lines parsed from the input file, comment characters stripped.
   def parse(data)
     sections = []
     docs, code = [], []
     lines = data.split("\n")
+
+    # The first line is ignored if it is a shebang line.  We also ignore the
+    # PEP 263 encoding information in python sourcefiles, and the similar ruby
+    # 1.9 syntax.
     lines.shift if lines[0] =~ /^\#\!/
     lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language]
 
@@ -290,7 +292,32 @@ class Rocco
       end
     end
     sections << [docs, code] if docs.any? || code.any?
-    sections
+    normalize_leading_spaces( sections )
+  end
+
+  # Normalizes documentation whitespace by checking for leading whitespace,
+  # removing it, and then removing the same amount of whitespace from each
+  # succeeding line.  That is:
+  #
+  #     def func():
+  #       """
+  #         Comment 1
+  #         Comment 2
+  #       """
+  #       print "omg!"
+  #
+  # should yield a comment block of `Comment 1\nComment 2` and code of
+  # `def func():\n  print "omg!"`
+  def normalize_leading_spaces( sections )
+    sections.map do |section|
+      if section[ 0 ]
+        leading_space = section[0][0].match( "^\s+" )
+        if leading_space
+          section[0] = section[0].map{ |line| line.sub( /^#{leading_space.to_s}/, '' ) }
+        end
+      end
+      section
+    end
   end
 
   # Take the list of paired *sections* two-tuples and split into two
diff --git a/test/test_block_comments.rb b/test/test_block_comments.rb
index a61fceb..f814a49 100644
--- a/test/test_block_comments.rb
+++ b/test/test_block_comments.rb
@@ -37,15 +37,15 @@ class RoccoBlockCommentTest < Test::Unit::TestCase
         r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
         assert_equal(
             [
-                [ [ "  Comment 1" ], [ "def codeblock", "end" ] ],
-                [ [ "  Comment 2" ], [] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2" ], [] ]
             ],
             r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\n" )
         )
         assert_equal(
             [
-                [ [ "  Comment 1" ], [ "def codeblock", "end" ] ],
-                [ [ "  Comment 2" ], [ "if false", "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2" ], [ "if false", "end" ] ]
             ],
             r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\nif false\nend" )
         )
diff --git a/test/test_comment_normalization.rb b/test/test_comment_normalization.rb
new file mode 100644
index 0000000..9c6919d
--- /dev/null
+++ b/test/test_comment_normalization.rb
@@ -0,0 +1,24 @@
+require File.dirname(__FILE__) + '/helper'
+
+class RoccoCommentNormalization < Test::Unit::TestCase
+    def test_normal_comments
+        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2a", "  Comment 2b" ], [] ]
+            ],
+            r.parse( "\"\"\"\n  Comment 1a\n  Comment 1b\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2a\n    Comment 2b\n\"\"\"\n" )
+        )
+    end
+    def test_single_line_comments
+        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2a", "  Comment 2b" ], [] ]
+            ],
+            r.parse( "#   Comment 1a\n#   Comment 1b\ndef codeblock\nend\n#   Comment 2a\n#     Comment 2b\n" )
+        )
+    end
+end