From b11543d382249b8eb50b250ed2fa8840b8683b60 Mon Sep 17 00:00:00 2001
From: Mike West <mike@mikewest.org>
Date: Mon, 22 Nov 2010 14:38:01 +0100
Subject: [PATCH] Normalizing leading space in comments

That is:

    def function:
        """
            This is a comment
            with _lots_ of leading
            space!  OMG!
        """
        pass

Will parse into:

    [
        [
            [   "This is a comment",
                "with _lots_ of leading",
                "space!  OMG!"
            ],
            ...
        ]
    ]
---
 lib/rocco.rb                       | 35 ++++++++++++++++++++++++++----
 test/test_block_comments.rb        |  8 +++----
 test/test_comment_normalization.rb | 24 ++++++++++++++++++++
 3 files changed, 59 insertions(+), 8 deletions(-)
 create mode 100644 test/test_comment_normalization.rb

diff --git a/lib/rocco.rb b/lib/rocco.rb
index 87b516b..f27e7bc 100644
--- a/lib/rocco.rb
+++ b/lib/rocco.rb
@@ -233,13 +233,15 @@ class Rocco
 
   # Parse the raw file data into a list of two-tuples. Each tuple has the
   # form `[docs, code]` where both elements are arrays containing the
-  # raw lines parsed from the input file. The first line is ignored if it
-  # is a shebang line.  We also ignore the PEP 263 encoding information in
-  # python sourcefiles, and the similar ruby 1.9 syntax.
+  # raw lines parsed from the input file, comment characters stripped.
   def parse(data)
     sections = []
     docs, code = [], []
     lines = data.split("\n")
+
+    # The first line is ignored if it is a shebang line.  We also ignore the
+    # PEP 263 encoding information in python sourcefiles, and the similar ruby
+    # 1.9 syntax.
     lines.shift if lines[0] =~ /^\#\!/
     lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language]
 
@@ -290,7 +292,32 @@ class Rocco
       end
     end
     sections << [docs, code] if docs.any? || code.any?
-    sections
+    normalize_leading_spaces( sections )
+  end
+
+  # Normalizes documentation whitespace by checking for leading whitespace,
+  # removing it, and then removing the same amount of whitespace from each
+  # succeeding line.  That is:
+  #
+  #     def func():
+  #       """
+  #         Comment 1
+  #         Comment 2
+  #       """
+  #       print "omg!"
+  #
+  # should yield a comment block of `Comment 1\nComment 2` and code of
+  # `def func():\n  print "omg!"`
+  def normalize_leading_spaces( sections )
+    sections.map do |section|
+      if section[ 0 ]
+        leading_space = section[0][0].match( "^\s+" )
+        if leading_space
+          section[0] = section[0].map{ |line| line.sub( /^#{leading_space.to_s}/, '' ) }
+        end
+      end
+      section
+    end
   end
 
   # Take the list of paired *sections* two-tuples and split into two
diff --git a/test/test_block_comments.rb b/test/test_block_comments.rb
index a61fceb..f814a49 100644
--- a/test/test_block_comments.rb
+++ b/test/test_block_comments.rb
@@ -37,15 +37,15 @@ class RoccoBlockCommentTest < Test::Unit::TestCase
         r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
         assert_equal(
             [
-                [ [ "  Comment 1" ], [ "def codeblock", "end" ] ],
-                [ [ "  Comment 2" ], [] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2" ], [] ]
             ],
             r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\n" )
         )
         assert_equal(
             [
-                [ [ "  Comment 1" ], [ "def codeblock", "end" ] ],
-                [ [ "  Comment 2" ], [ "if false", "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2" ], [ "if false", "end" ] ]
             ],
             r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\nif false\nend" )
         )
diff --git a/test/test_comment_normalization.rb b/test/test_comment_normalization.rb
new file mode 100644
index 0000000..9c6919d
--- /dev/null
+++ b/test/test_comment_normalization.rb
@@ -0,0 +1,24 @@
+require File.dirname(__FILE__) + '/helper'
+
+class RoccoCommentNormalization < Test::Unit::TestCase
+    def test_normal_comments
+        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2a", "  Comment 2b" ], [] ]
+            ],
+            r.parse( "\"\"\"\n  Comment 1a\n  Comment 1b\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2a\n    Comment 2b\n\"\"\"\n" )
+        )
+    end
+    def test_single_line_comments
+        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
+        assert_equal(
+            [
+                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
+                [ [ "Comment 2a", "  Comment 2b" ], [] ]
+            ],
+            r.parse( "#   Comment 1a\n#   Comment 1b\ndef codeblock\nend\n#   Comment 2a\n#     Comment 2b\n" )
+        )
+    end
+end