From b11543d382249b8eb50b250ed2fa8840b8683b60 Mon Sep 17 00:00:00 2001 From: Mike West Date: Mon, 22 Nov 2010 14:38:01 +0100 Subject: [PATCH] Normalizing leading space in comments That is: def function: """ This is a comment with _lots_ of leading space! OMG! """ pass Will parse into: [ [ [ "This is a comment", "with _lots_ of leading", "space! OMG!" ], ... ] ] --- lib/rocco.rb | 35 ++++++++++++++++++++++++++---- test/test_block_comments.rb | 8 +++---- test/test_comment_normalization.rb | 24 ++++++++++++++++++++ 3 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 test/test_comment_normalization.rb diff --git a/lib/rocco.rb b/lib/rocco.rb index 87b516b..f27e7bc 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -233,13 +233,15 @@ class Rocco # Parse the raw file data into a list of two-tuples. Each tuple has the # form `[docs, code]` where both elements are arrays containing the - # raw lines parsed from the input file. The first line is ignored if it - # is a shebang line. We also ignore the PEP 263 encoding information in - # python sourcefiles, and the similar ruby 1.9 syntax. + # raw lines parsed from the input file, comment characters stripped. def parse(data) sections = [] docs, code = [], [] lines = data.split("\n") + + # The first line is ignored if it is a shebang line. We also ignore the + # PEP 263 encoding information in python sourcefiles, and the similar ruby + # 1.9 syntax. lines.shift if lines[0] =~ /^\#\!/ lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language] @@ -290,7 +292,32 @@ class Rocco end end sections << [docs, code] if docs.any? || code.any? - sections + normalize_leading_spaces( sections ) + end + + # Normalizes documentation whitespace by checking for leading whitespace, + # removing it, and then removing the same amount of whitespace from each + # succeeding line. That is: + # + # def func(): + # """ + # Comment 1 + # Comment 2 + # """ + # print "omg!" + # + # should yield a comment block of `Comment 1\nComment 2` and code of + # `def func():\n print "omg!"` + def normalize_leading_spaces( sections ) + sections.map do |section| + if section[ 0 ] + leading_space = section[0][0].match( "^\s+" ) + if leading_space + section[0] = section[0].map{ |line| line.sub( /^#{leading_space.to_s}/, '' ) } + end + end + section + end end # Take the list of paired *sections* two-tuples and split into two diff --git a/test/test_block_comments.rb b/test/test_block_comments.rb index a61fceb..f814a49 100644 --- a/test/test_block_comments.rb +++ b/test/test_block_comments.rb @@ -37,15 +37,15 @@ class RoccoBlockCommentTest < Test::Unit::TestCase r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse` assert_equal( [ - [ [ " Comment 1" ], [ "def codeblock", "end" ] ], - [ [ " Comment 2" ], [] ] + [ [ "Comment 1" ], [ "def codeblock", "end" ] ], + [ [ "Comment 2" ], [] ] ], r.parse( "\"\"\"\n Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n Comment 2\n\"\"\"\n" ) ) assert_equal( [ - [ [ " Comment 1" ], [ "def codeblock", "end" ] ], - [ [ " Comment 2" ], [ "if false", "end" ] ] + [ [ "Comment 1" ], [ "def codeblock", "end" ] ], + [ [ "Comment 2" ], [ "if false", "end" ] ] ], r.parse( "\"\"\"\n Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n Comment 2\n\"\"\"\nif false\nend" ) ) diff --git a/test/test_comment_normalization.rb b/test/test_comment_normalization.rb new file mode 100644 index 0000000..9c6919d --- /dev/null +++ b/test/test_comment_normalization.rb @@ -0,0 +1,24 @@ +require File.dirname(__FILE__) + '/helper' + +class RoccoCommentNormalization < Test::Unit::TestCase + def test_normal_comments + r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse` + assert_equal( + [ + [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ], + [ [ "Comment 2a", " Comment 2b" ], [] ] + ], + r.parse( "\"\"\"\n Comment 1a\n Comment 1b\n\"\"\"\ndef codeblock\nend\n\"\"\"\n Comment 2a\n Comment 2b\n\"\"\"\n" ) + ) + end + def test_single_line_comments + r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse` + assert_equal( + [ + [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ], + [ [ "Comment 2a", " Comment 2b" ], [] ] + ], + r.parse( "# Comment 1a\n# Comment 1b\ndef codeblock\nend\n# Comment 2a\n# Comment 2b\n" ) + ) + end +end