Merge branch 'blockcomments'

2010-11-22 14:42:56 +01:00 · 2010-11-22 14:42:56 +01:00 · 1262d50857
commit 1262d50857
parent ba93d23634 b11543d382
6 changed files with 203 additions and 46 deletions
--- a/lib/rocco.rb
+++ b/lib/rocco.rb
@ -100,19 +100,25 @@ class Rocco
    if detect_language() != "text"
      # then assign the detected language to `:language`, and look for
      # comment characters based on that language
-      @options[:language] = detect_language()
+      @options[:language]         = detect_language()
      @options[:comment_chars]    = generate_comment_chars()
    # If we didn't detect a language, but the user provided one, use it
    # to look around for comment characters to override the default.
    elsif @options[:language] != defaults[:language]
      @options[:comment_chars]    = generate_comment_chars()
    # If neither is true, then convert the default comment character string
    # into the comment_char syntax (we'll discuss that syntax in detail when
    # we get to `generate_comment_chars()` in a moment.
    else
      @options[:comment_chars]    = { :single => @options[:comment_chars], :multi => nil }
    end
    # Turn `:comment_chars` into a regex matching a series of spaces, the 
    # `:comment_chars` string, and the an optional space.  We'll use that
    # to detect single-line comments.
-    @comment_pattern            = Regexp.new("^\\s*#{@options[:comment_chars]}\s?")
+    @comment_pattern            = Regexp.new("^\\s*#{@options[:comment_chars][:single]}\s?")
    # `parse()` the file contents stored in `@data`.  Run the result through `split()`
    # and that result through `highlight()` to generate the final section list.
@ -202,21 +208,22 @@ class Rocco
      language        = @options[:language]
      comment_styles  = {
        "bash"          =>  { :single => "#",   :multi => nil },
-        "c"             =>  { :single => "//",  :multi => { :start => "/**", :middle => "*", :end => "*/" } },
+        "c"             =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
-        "coffee-script" =>  { :single => "#",   :multi => { :start => "###", :middle => nil, :end => "###" } },
+        "coffee-script" =>  { :single => "#",   :multi => { :start => "###",    :middle => nil, :end => "###" } },
-        "cpp"           =>  { :single => "//",  :multi => { :start => "/**", :middle => "*", :end => "*/" } },
+        "cpp"           =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
-        "java"          =>  { :single => "//",  :multi => { :start => "/**", :middle => "*", :end => "*/" } },
+        "css"           =>  { :single => nil,   :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
-        "js"            =>  { :single => "//",  :multi => { :start => "/**", :middle => "*", :end => "*/" } },
+        "java"          =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
        "js"            =>  { :single => "//",  :multi => { :start => "/**",    :middle => "*", :end => "*/" } },
        "lua"           =>  { :single => "--",  :multi => nil },
-        "python"        =>  { :single => "#",   :multi => { :start => '"""', :middle => nil, :end => '"""' } },
+        "python"        =>  { :single => "#",   :multi => { :start => '"""',    :middle => nil, :end => '"""' } },
-        "rb"            =>  { :single => "#",   :multi => nil },
+        "rb"            =>  { :single => "#",   :multi => { :start => '=begin', :middle => nil, :end => '=end' } },
        "scheme"        =>  { :single => ";;",  :multi => nil },
      }
      if comment_styles[language]
-        comment_styles[language][:single]
+        comment_styles[language]
      else
-        @options[:comment_chars]
+        { :single => @options[:comment_chars], :multi => nil }
      end
    end
  end
@ -226,29 +233,91 @@ class Rocco
  # Parse the raw file data into a list of two-tuples. Each tuple has the
  # form `[docs, code]` where both elements are arrays containing the
-  # raw lines parsed from the input file. The first line is ignored if it
+  # raw lines parsed from the input file, comment characters stripped.
  # is a shebang line.  We also ignore the PEP 263 encoding information in
  # python sourcefiles, and the similar ruby 1.9 syntax.
  def parse(data)
    sections = []
    docs, code = [], []
    lines = data.split("\n")
    # The first line is ignored if it is a shebang line.  We also ignore the
    # PEP 263 encoding information in python sourcefiles, and the similar ruby
    # 1.9 syntax.
    lines.shift if lines[0] =~ /^\#\!/
    lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language]
    # To detect both block comments and single-line comments, we'll set
    # up a tiny state machine, and loop through each line of the file.
    # This requires an `in_comment_block` boolean, and a few regular 
    # expressions for line tests.
    in_comment_block    = false
    single_line_comment, block_comment_start, block_comment_mid, block_comment_end = nil, nil, nil, nil
    if not @options[:comment_chars][:single].nil?
      single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\\s?")
    end
    if not @options[:comment_chars][:multi].nil?
      block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*$")
      block_comment_end   = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$")
      if @options[:comment_chars][:multi][:middle]
        block_comment_mid = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:middle])}\\s?")
      end
    end
    lines.each do |line|
-      case line
+      # If we're currently in a comment block, check whether the line matches
-      when @comment_pattern
+      # the _end_ of a comment block.
-        if code.any?
+      if in_comment_block
-          sections << [docs, code]
+        if block_comment_end && line.match( block_comment_end )
-          docs, code = [], []
+          in_comment_block = false
        else
          docs << line.sub( block_comment_mid || '', '' )
        end
-        docs << line
+      # Otherwise, check whether the line matches the beginning of a block, or
      # a single-line comment all on it's lonesome.  In either case, if there's
      # code, start a new section
      else
-        code << line
+        if block_comment_start && line.match( block_comment_start )
          in_comment_block = true
          if code.any?
            sections << [docs, code]
            docs, code = [], []
          end
        elsif single_line_comment && line.match( single_line_comment )
          if code.any?
            sections << [docs, code]
            docs, code = [], []
          end
          docs << line.sub( single_line_comment || '', '' )
        else
          code << line
        end
      end
    end
    sections << [docs, code] if docs.any? || code.any?
-    sections
+    normalize_leading_spaces( sections )
  end
  # Normalizes documentation whitespace by checking for leading whitespace,
  # removing it, and then removing the same amount of whitespace from each
  # succeeding line.  That is:
  #
  #     def func():
  #       """
  #         Comment 1
  #         Comment 2
  #       """
  #       print "omg!"
  #
  # should yield a comment block of `Comment 1\nComment 2` and code of
  # `def func():\n  print "omg!"`
  def normalize_leading_spaces( sections )
    sections.map do |section|
      if section[ 0 ]
        leading_space = section[0][0].match( "^\s+" )
        if leading_space
          section[0] = section[0].map{ |line| line.sub( /^#{leading_space.to_s}/, '' ) }
        end
      end
      section
    end
  end
  # Take the list of paired *sections* two-tuples and split into two
@ -257,7 +326,7 @@ class Rocco
  def split(sections)
    docs_blocks, code_blocks = [], []
    sections.each do |docs,code|
-      docs_blocks << docs.map { |line| line.sub(@comment_pattern, '') }.join("\n")
+      docs_blocks << docs.join("\n")
      code_blocks << code.map do |line|
        tabs = line.match(/^(\t+)/)
        tabs ? line.sub(/^\t+/, '  ' * tabs.captures[0].length) : line
@ -281,7 +350,7 @@ class Rocco
    # Combine all code blocks into a single big stream and run through either
    # `pygmentize(1)` or <http://pygments.appspot.com>
-    code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n")
+    code_stream = code_blocks.join("\n\n#{@options[:comment_chars][:single]} DIVIDER\n\n")
    if pygmentize? 
      code_html = highlight_pygmentize(code_stream)
@ -292,7 +361,7 @@ class Rocco
    # Do some post-processing on the pygments output to split things back
    # into sections and remove partial `<pre>` blocks.
    code_html = code_html.
-      split(/\n*<span class="c.?">#{@options[:comment_chars]} DIVIDER<\/span>\n*/m).
+      split(/\n*<span class="c.?">#{@options[:comment_chars][:single]} DIVIDER<\/span>\n*/m).
      map { |code| code.sub(/\n?<div class="highlight"><pre>/m, '') }.
      map { |code| code.sub(/\n?<\/pre><\/div>\n/m, '') }
--- a/test/test_basics.rb
+++ b/test/test_basics.rb
@ -27,14 +27,14 @@ class RoccoBasicTests < Test::Unit::TestCase
        r = Rocco.new( 'test' ) { "" } # Generate throwaway instance so I can test `parse`
        assert_equal(
            [
-                [ [ "# Comment 1" ], [ "def codeblock", "end" ] ]
+                [ [ "Comment 1" ], [ "def codeblock", "end" ] ]
            ],
            r.parse( "# Comment 1\ndef codeblock\nend\n" )
        )
        assert_equal(
            [
-                [ [ "# Comment 1" ], [ "def codeblock" ] ],
+                [ [ "Comment 1" ], [ "def codeblock" ] ],
-                [ [ "# Comment 2" ], [ "end" ] ]
+                [ [ "Comment 2" ], [ "end" ] ]
            ],
            r.parse( "# Comment 1\ndef codeblock\n# Comment 2\nend\n" )
        )
@ -47,7 +47,7 @@ class RoccoBasicTests < Test::Unit::TestCase
                [ "Comment 1" ],
                [ "def codeblock\nend" ]
            ],
-            r.split([ [ [ "# Comment 1" ], [ "def codeblock", "end" ] ] ])
+            r.split([ [ [ "Comment 1" ], [ "def codeblock", "end" ] ] ])
        )
        assert_equal(
            [
@ -55,8 +55,8 @@ class RoccoBasicTests < Test::Unit::TestCase
                [ "def codeblock", "end" ]
            ],
            r.split( [
-                [ [ "# Comment 1" ], [ "def codeblock" ] ],
+                [ [ "Comment 1" ], [ "def codeblock" ] ],
-                [ [ "# Comment 2" ], [ "end" ] ]
+                [ [ "Comment 2" ], [ "end" ] ]
            ] )
        )
    end
--- a/test/test_block_comments.rb
+++ b/test/test_block_comments.rb
@ -0,0 +1,63 @@
 require File.dirname(__FILE__) + '/helper'
 class RoccoBlockCommentTest < Test::Unit::TestCase
    def test_basics
        r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
        assert_equal(
            [
                [ [ "Comment 1" ], [ "def codeblock", "end" ] ]
            ],
            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n" )
        )
        assert_equal(
            [
                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ]
            ],
            r.parse( "/**\n * Comment 1a\n * Comment 1b\n */\ndef codeblock\nend\n" )
        )
    end
    def test_multiple_blocks
        r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
        assert_equal(
            [
                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
                [ [ "Comment 2" ], [] ]
            ],
            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\n" )
        )
        assert_equal(
            [
                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
                [ [ "Comment 2" ], [ "if false", "end" ] ]
            ],
            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\nif false\nend" )
        )
    end
    def test_block_without_middle_character
        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
        assert_equal(
            [
                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
                [ [ "Comment 2" ], [] ]
            ],
            r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\n" )
        )
        assert_equal(
            [
                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
                [ [ "Comment 2" ], [ "if false", "end" ] ]
            ],
            r.parse( "\"\"\"\n  Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2\n\"\"\"\nif false\nend" )
        )
    end 
    def test_language_without_single_line_comments
        r = Rocco.new( 'test', '', { :language => "css" } ) { "" } # Generate throwaway instance so I can test `parse`
        assert_equal(
            [
                [ [ "Comment 1" ], [ "def codeblock", "end" ] ],
                [ [ "Comment 2" ], [ "if false", "end" ] ]
            ],
            r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\nif false\nend" )
        )
    end
 end
--- a/test/test_comment_normalization.rb
+++ b/test/test_comment_normalization.rb
@ -0,0 +1,24 @@
 require File.dirname(__FILE__) + '/helper'
 class RoccoCommentNormalization < Test::Unit::TestCase
    def test_normal_comments
        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
        assert_equal(
            [
                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
                [ [ "Comment 2a", "  Comment 2b" ], [] ]
            ],
            r.parse( "\"\"\"\n  Comment 1a\n  Comment 1b\n\"\"\"\ndef codeblock\nend\n\"\"\"\n  Comment 2a\n    Comment 2b\n\"\"\"\n" )
        )
    end
    def test_single_line_comments
        r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
        assert_equal(
            [
                [ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
                [ [ "Comment 2a", "  Comment 2b" ], [] ]
            ],
            r.parse( "#   Comment 1a\n#   Comment 1b\ndef codeblock\nend\n#   Comment 2a\n#     Comment 2b\n" )
        )
    end
 end
--- a/test/test_commentchar_detection.rb
+++ b/test/test_commentchar_detection.rb
@ -3,22 +3,22 @@ require File.dirname(__FILE__) + '/helper'
 class RoccoAutomaticCommentChars < Test::Unit::TestCase
    def test_basic_detection
        r = Rocco.new( 'filename.js' ) { "" }
-        assert_equal "//", r.options[:comment_chars]
+        assert_equal "//", r.options[:comment_chars][:single]
    end
    def test_fallback_language
        r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "js" } ) { "" }
-        assert_equal "//", r.options[:comment_chars]
+        assert_equal "//", r.options[:comment_chars][:single]
    end
    def test_fallback_default
        r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" }
-        assert_equal "#", r.options[:comment_chars], "`:comment_chars` should be `#` when falling back to defaults."
+        assert_equal "#", r.options[:comment_chars][:single], "`:comment_chars` should be `#` when falling back to defaults."
    end
    def test_fallback_user
        r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :comment_chars => "user" } ) { "" }
-        assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings."
+        assert_equal "user", r.options[:comment_chars][:single], "`:comment_chars` should be the user's default when falling back to user-provided settings."
    end
    def test_fallback_user_with_unknown_language
        r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "not-a-language", :comment_chars => "user" } ) { "" }
-        assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings."
+        assert_equal "user", r.options[:comment_chars][:single], "`:comment_chars` should be the user's default when falling back to user-provided settings."
    end
 end
--- a/test/test_skippable_lines.rb
+++ b/test/test_skippable_lines.rb
@ -5,8 +5,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
    r = Rocco.new( 'filename.sh' ) { "" }
    assert_equal(
        [
-            [ [ "# Comment 1" ], [ "def codeblock" ] ],
+            [ [ "Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 2" ], [ "end" ] ]
        ],
        r.parse( "#!/usr/bin/env bash\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
        "Shebang should be stripped when it appears as the first line."
@ -16,8 +16,9 @@ class RoccoSkippableLines < Test::Unit::TestCase
    r = Rocco.new( 'filename.sh' ) { "" }
    assert_equal(
        [
-            [ [ "# Comment 1", "#!/usr/bin/env bash" ], [ "def codeblock" ] ],
+            # @TODO: `#!/` shouldn't be recognized as a comment.
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 1", "!/usr/bin/env bash" ], [ "def codeblock" ] ],
            [ [ "Comment 2" ], [ "end" ] ]
        ],
        r.parse( "# Comment 1\n#!/usr/bin/env bash\ndef codeblock\n# Comment 2\nend\n" ),
        "Shebang shouldn't be stripped anywhere other than as the first line."
@ -27,8 +28,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
    r = Rocco.new( 'filename.rb' ) { "" }
    assert_equal(
        [
-            [ [ "# Comment 1" ], [ "def codeblock" ] ],
+            [ [ "Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 2" ], [ "end" ] ]
        ],
        r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
        "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
@ -38,8 +39,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
    r = Rocco.new( 'filename.py' ) { "" }
    assert_equal(
        [
-            [ [ "# Comment 1" ], [ "def codeblock" ] ],
+            [ [ "Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 2" ], [ "end" ] ]
        ],
        r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
        "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
@ -49,8 +50,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
    r = Rocco.new( 'filename.sh' ) { "" }
    assert_equal(
        [
-            [ [ "# encoding: utf-8", "# Comment 1" ], [ "def codeblock" ] ],
+            [ [ "encoding: utf-8", "Comment 1" ], [ "def codeblock" ] ],
-            [ [ "# Comment 2" ], [ "end" ] ]
+            [ [ "Comment 2" ], [ "end" ] ]
        ],
        r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
        "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."