Merge branch 'blockcomments'
This commit is contained in:
commit
1262d50857
97
lib/rocco.rb
97
lib/rocco.rb
@ -107,12 +107,18 @@ class Rocco
|
||||
# to look around for comment characters to override the default.
|
||||
elsif @options[:language] != defaults[:language]
|
||||
@options[:comment_chars] = generate_comment_chars()
|
||||
|
||||
# If neither is true, then convert the default comment character string
|
||||
# into the comment_char syntax (we'll discuss that syntax in detail when
|
||||
# we get to `generate_comment_chars()` in a moment.
|
||||
else
|
||||
@options[:comment_chars] = { :single => @options[:comment_chars], :multi => nil }
|
||||
end
|
||||
|
||||
# Turn `:comment_chars` into a regex matching a series of spaces, the
|
||||
# `:comment_chars` string, and the an optional space. We'll use that
|
||||
# to detect single-line comments.
|
||||
@comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?")
|
||||
@comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars][:single]}\s?")
|
||||
|
||||
# `parse()` the file contents stored in `@data`. Run the result through `split()`
|
||||
# and that result through `highlight()` to generate the final section list.
|
||||
@ -205,18 +211,19 @@ class Rocco
|
||||
"c" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } },
|
||||
"coffee-script" => { :single => "#", :multi => { :start => "###", :middle => nil, :end => "###" } },
|
||||
"cpp" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } },
|
||||
"css" => { :single => nil, :multi => { :start => "/**", :middle => "*", :end => "*/" } },
|
||||
"java" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } },
|
||||
"js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } },
|
||||
"lua" => { :single => "--", :multi => nil },
|
||||
"python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } },
|
||||
"rb" => { :single => "#", :multi => nil },
|
||||
"rb" => { :single => "#", :multi => { :start => '=begin', :middle => nil, :end => '=end' } },
|
||||
"scheme" => { :single => ";;", :multi => nil },
|
||||
}
|
||||
|
||||
if comment_styles[language]
|
||||
comment_styles[language][:single]
|
||||
comment_styles[language]
|
||||
else
|
||||
@options[:comment_chars]
|
||||
{ :single => @options[:comment_chars], :multi => nil }
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -226,29 +233,91 @@ class Rocco
|
||||
|
||||
# Parse the raw file data into a list of two-tuples. Each tuple has the
|
||||
# form `[docs, code]` where both elements are arrays containing the
|
||||
# raw lines parsed from the input file. The first line is ignored if it
|
||||
# is a shebang line. We also ignore the PEP 263 encoding information in
|
||||
# python sourcefiles, and the similar ruby 1.9 syntax.
|
||||
# raw lines parsed from the input file, comment characters stripped.
|
||||
def parse(data)
|
||||
sections = []
|
||||
docs, code = [], []
|
||||
lines = data.split("\n")
|
||||
|
||||
# The first line is ignored if it is a shebang line. We also ignore the
|
||||
# PEP 263 encoding information in python sourcefiles, and the similar ruby
|
||||
# 1.9 syntax.
|
||||
lines.shift if lines[0] =~ /^\#\!/
|
||||
lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language]
|
||||
|
||||
# To detect both block comments and single-line comments, we'll set
|
||||
# up a tiny state machine, and loop through each line of the file.
|
||||
# This requires an `in_comment_block` boolean, and a few regular
|
||||
# expressions for line tests.
|
||||
in_comment_block = false
|
||||
single_line_comment, block_comment_start, block_comment_mid, block_comment_end = nil, nil, nil, nil
|
||||
if not @options[:comment_chars][:single].nil?
|
||||
single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\\s?")
|
||||
end
|
||||
if not @options[:comment_chars][:multi].nil?
|
||||
block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*$")
|
||||
block_comment_end = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$")
|
||||
if @options[:comment_chars][:multi][:middle]
|
||||
block_comment_mid = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:middle])}\\s?")
|
||||
end
|
||||
end
|
||||
lines.each do |line|
|
||||
case line
|
||||
when @comment_pattern
|
||||
# If we're currently in a comment block, check whether the line matches
|
||||
# the _end_ of a comment block.
|
||||
if in_comment_block
|
||||
if block_comment_end && line.match( block_comment_end )
|
||||
in_comment_block = false
|
||||
else
|
||||
docs << line.sub( block_comment_mid || '', '' )
|
||||
end
|
||||
# Otherwise, check whether the line matches the beginning of a block, or
|
||||
# a single-line comment all on it's lonesome. In either case, if there's
|
||||
# code, start a new section
|
||||
else
|
||||
if block_comment_start && line.match( block_comment_start )
|
||||
in_comment_block = true
|
||||
if code.any?
|
||||
sections << [docs, code]
|
||||
docs, code = [], []
|
||||
end
|
||||
docs << line
|
||||
elsif single_line_comment && line.match( single_line_comment )
|
||||
if code.any?
|
||||
sections << [docs, code]
|
||||
docs, code = [], []
|
||||
end
|
||||
docs << line.sub( single_line_comment || '', '' )
|
||||
else
|
||||
code << line
|
||||
end
|
||||
end
|
||||
end
|
||||
sections << [docs, code] if docs.any? || code.any?
|
||||
sections
|
||||
normalize_leading_spaces( sections )
|
||||
end
|
||||
|
||||
# Normalizes documentation whitespace by checking for leading whitespace,
|
||||
# removing it, and then removing the same amount of whitespace from each
|
||||
# succeeding line. That is:
|
||||
#
|
||||
# def func():
|
||||
# """
|
||||
# Comment 1
|
||||
# Comment 2
|
||||
# """
|
||||
# print "omg!"
|
||||
#
|
||||
# should yield a comment block of `Comment 1\nComment 2` and code of
|
||||
# `def func():\n print "omg!"`
|
||||
def normalize_leading_spaces( sections )
|
||||
sections.map do |section|
|
||||
if section[ 0 ]
|
||||
leading_space = section[0][0].match( "^\s+" )
|
||||
if leading_space
|
||||
section[0] = section[0].map{ |line| line.sub( /^#{leading_space.to_s}/, '' ) }
|
||||
end
|
||||
end
|
||||
section
|
||||
end
|
||||
end
|
||||
|
||||
# Take the list of paired *sections* two-tuples and split into two
|
||||
@ -257,7 +326,7 @@ class Rocco
|
||||
def split(sections)
|
||||
docs_blocks, code_blocks = [], []
|
||||
sections.each do |docs,code|
|
||||
docs_blocks << docs.map { |line| line.sub(@comment_pattern, '') }.join("\n")
|
||||
docs_blocks << docs.join("\n")
|
||||
code_blocks << code.map do |line|
|
||||
tabs = line.match(/^(\t+)/)
|
||||
tabs ? line.sub(/^\t+/, ' ' * tabs.captures[0].length) : line
|
||||
@ -281,7 +350,7 @@ class Rocco
|
||||
|
||||
# Combine all code blocks into a single big stream and run through either
|
||||
# `pygmentize(1)` or <http://pygments.appspot.com>
|
||||
code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n")
|
||||
code_stream = code_blocks.join("\n\n#{@options[:comment_chars][:single]} DIVIDER\n\n")
|
||||
|
||||
if pygmentize?
|
||||
code_html = highlight_pygmentize(code_stream)
|
||||
@ -292,7 +361,7 @@ class Rocco
|
||||
# Do some post-processing on the pygments output to split things back
|
||||
# into sections and remove partial `<pre>` blocks.
|
||||
code_html = code_html.
|
||||
split(/\n*<span class="c.?">#{@options[:comment_chars]} DIVIDER<\/span>\n*/m).
|
||||
split(/\n*<span class="c.?">#{@options[:comment_chars][:single]} DIVIDER<\/span>\n*/m).
|
||||
map { |code| code.sub(/\n?<div class="highlight"><pre>/m, '') }.
|
||||
map { |code| code.sub(/\n?<\/pre><\/div>\n/m, '') }
|
||||
|
||||
|
@ -27,14 +27,14 @@ class RoccoBasicTests < Test::Unit::TestCase
|
||||
r = Rocco.new( 'test' ) { "" } # Generate throwaway instance so I can test `parse`
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "# Comment 1" ], [ "def codeblock", "end" ] ]
|
||||
[ [ "Comment 1" ], [ "def codeblock", "end" ] ]
|
||||
],
|
||||
r.parse( "# Comment 1\ndef codeblock\nend\n" )
|
||||
)
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "# Comment 2" ], [ "end" ] ]
|
||||
[ [ "Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "Comment 2" ], [ "end" ] ]
|
||||
],
|
||||
r.parse( "# Comment 1\ndef codeblock\n# Comment 2\nend\n" )
|
||||
)
|
||||
@ -47,7 +47,7 @@ class RoccoBasicTests < Test::Unit::TestCase
|
||||
[ "Comment 1" ],
|
||||
[ "def codeblock\nend" ]
|
||||
],
|
||||
r.split([ [ [ "# Comment 1" ], [ "def codeblock", "end" ] ] ])
|
||||
r.split([ [ [ "Comment 1" ], [ "def codeblock", "end" ] ] ])
|
||||
)
|
||||
assert_equal(
|
||||
[
|
||||
@ -55,8 +55,8 @@ class RoccoBasicTests < Test::Unit::TestCase
|
||||
[ "def codeblock", "end" ]
|
||||
],
|
||||
r.split( [
|
||||
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "# Comment 2" ], [ "end" ] ]
|
||||
[ [ "Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "Comment 2" ], [ "end" ] ]
|
||||
] )
|
||||
)
|
||||
end
|
||||
|
63
test/test_block_comments.rb
Normal file
63
test/test_block_comments.rb
Normal file
@ -0,0 +1,63 @@
|
||||
require File.dirname(__FILE__) + '/helper'
|
||||
|
||||
class RoccoBlockCommentTest < Test::Unit::TestCase
|
||||
def test_basics
|
||||
r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1" ], [ "def codeblock", "end" ] ]
|
||||
],
|
||||
r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n" )
|
||||
)
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ]
|
||||
],
|
||||
r.parse( "/**\n * Comment 1a\n * Comment 1b\n */\ndef codeblock\nend\n" )
|
||||
)
|
||||
end
|
||||
def test_multiple_blocks
|
||||
r = Rocco.new( 'test', '', { :language => "c" } ) { "" } # Generate throwaway instance so I can test `parse`
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1" ], [ "def codeblock", "end" ] ],
|
||||
[ [ "Comment 2" ], [] ]
|
||||
],
|
||||
r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\n" )
|
||||
)
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1" ], [ "def codeblock", "end" ] ],
|
||||
[ [ "Comment 2" ], [ "if false", "end" ] ]
|
||||
],
|
||||
r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\nif false\nend" )
|
||||
)
|
||||
end
|
||||
def test_block_without_middle_character
|
||||
r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1" ], [ "def codeblock", "end" ] ],
|
||||
[ [ "Comment 2" ], [] ]
|
||||
],
|
||||
r.parse( "\"\"\"\n Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n Comment 2\n\"\"\"\n" )
|
||||
)
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1" ], [ "def codeblock", "end" ] ],
|
||||
[ [ "Comment 2" ], [ "if false", "end" ] ]
|
||||
],
|
||||
r.parse( "\"\"\"\n Comment 1\n\"\"\"\ndef codeblock\nend\n\"\"\"\n Comment 2\n\"\"\"\nif false\nend" )
|
||||
)
|
||||
end
|
||||
def test_language_without_single_line_comments
|
||||
r = Rocco.new( 'test', '', { :language => "css" } ) { "" } # Generate throwaway instance so I can test `parse`
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1" ], [ "def codeblock", "end" ] ],
|
||||
[ [ "Comment 2" ], [ "if false", "end" ] ]
|
||||
],
|
||||
r.parse( "/**\n * Comment 1\n */\ndef codeblock\nend\n/**\n * Comment 2\n */\nif false\nend" )
|
||||
)
|
||||
end
|
||||
end
|
24
test/test_comment_normalization.rb
Normal file
24
test/test_comment_normalization.rb
Normal file
@ -0,0 +1,24 @@
|
||||
require File.dirname(__FILE__) + '/helper'
|
||||
|
||||
class RoccoCommentNormalization < Test::Unit::TestCase
|
||||
def test_normal_comments
|
||||
r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
|
||||
[ [ "Comment 2a", " Comment 2b" ], [] ]
|
||||
],
|
||||
r.parse( "\"\"\"\n Comment 1a\n Comment 1b\n\"\"\"\ndef codeblock\nend\n\"\"\"\n Comment 2a\n Comment 2b\n\"\"\"\n" )
|
||||
)
|
||||
end
|
||||
def test_single_line_comments
|
||||
r = Rocco.new( 'test', '', { :language => "python" } ) { "" } # Generate throwaway instance so I can test `parse`
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "Comment 1a", "Comment 1b" ], [ "def codeblock", "end" ] ],
|
||||
[ [ "Comment 2a", " Comment 2b" ], [] ]
|
||||
],
|
||||
r.parse( "# Comment 1a\n# Comment 1b\ndef codeblock\nend\n# Comment 2a\n# Comment 2b\n" )
|
||||
)
|
||||
end
|
||||
end
|
@ -3,22 +3,22 @@ require File.dirname(__FILE__) + '/helper'
|
||||
class RoccoAutomaticCommentChars < Test::Unit::TestCase
|
||||
def test_basic_detection
|
||||
r = Rocco.new( 'filename.js' ) { "" }
|
||||
assert_equal "//", r.options[:comment_chars]
|
||||
assert_equal "//", r.options[:comment_chars][:single]
|
||||
end
|
||||
def test_fallback_language
|
||||
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "js" } ) { "" }
|
||||
assert_equal "//", r.options[:comment_chars]
|
||||
assert_equal "//", r.options[:comment_chars][:single]
|
||||
end
|
||||
def test_fallback_default
|
||||
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" }
|
||||
assert_equal "#", r.options[:comment_chars], "`:comment_chars` should be `#` when falling back to defaults."
|
||||
assert_equal "#", r.options[:comment_chars][:single], "`:comment_chars` should be `#` when falling back to defaults."
|
||||
end
|
||||
def test_fallback_user
|
||||
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :comment_chars => "user" } ) { "" }
|
||||
assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings."
|
||||
assert_equal "user", r.options[:comment_chars][:single], "`:comment_chars` should be the user's default when falling back to user-provided settings."
|
||||
end
|
||||
def test_fallback_user_with_unknown_language
|
||||
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "not-a-language", :comment_chars => "user" } ) { "" }
|
||||
assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings."
|
||||
assert_equal "user", r.options[:comment_chars][:single], "`:comment_chars` should be the user's default when falling back to user-provided settings."
|
||||
end
|
||||
end
|
||||
|
@ -5,8 +5,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
|
||||
r = Rocco.new( 'filename.sh' ) { "" }
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "# Comment 2" ], [ "end" ] ]
|
||||
[ [ "Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "Comment 2" ], [ "end" ] ]
|
||||
],
|
||||
r.parse( "#!/usr/bin/env bash\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||
"Shebang should be stripped when it appears as the first line."
|
||||
@ -16,8 +16,9 @@ class RoccoSkippableLines < Test::Unit::TestCase
|
||||
r = Rocco.new( 'filename.sh' ) { "" }
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "# Comment 1", "#!/usr/bin/env bash" ], [ "def codeblock" ] ],
|
||||
[ [ "# Comment 2" ], [ "end" ] ]
|
||||
# @TODO: `#!/` shouldn't be recognized as a comment.
|
||||
[ [ "Comment 1", "!/usr/bin/env bash" ], [ "def codeblock" ] ],
|
||||
[ [ "Comment 2" ], [ "end" ] ]
|
||||
],
|
||||
r.parse( "# Comment 1\n#!/usr/bin/env bash\ndef codeblock\n# Comment 2\nend\n" ),
|
||||
"Shebang shouldn't be stripped anywhere other than as the first line."
|
||||
@ -27,8 +28,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
|
||||
r = Rocco.new( 'filename.rb' ) { "" }
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "# Comment 2" ], [ "end" ] ]
|
||||
[ [ "Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "Comment 2" ], [ "end" ] ]
|
||||
],
|
||||
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
|
||||
@ -38,8 +39,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
|
||||
r = Rocco.new( 'filename.py' ) { "" }
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "# Comment 2" ], [ "end" ] ]
|
||||
[ [ "Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "Comment 2" ], [ "end" ] ]
|
||||
],
|
||||
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
|
||||
@ -49,8 +50,8 @@ class RoccoSkippableLines < Test::Unit::TestCase
|
||||
r = Rocco.new( 'filename.sh' ) { "" }
|
||||
assert_equal(
|
||||
[
|
||||
[ [ "# encoding: utf-8", "# Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "# Comment 2" ], [ "end" ] ]
|
||||
[ [ "encoding: utf-8", "Comment 1" ], [ "def codeblock" ] ],
|
||||
[ [ "Comment 2" ], [ "end" ] ]
|
||||
],
|
||||
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
|
||||
|
Loading…
Reference in New Issue
Block a user