From a43f0fc584c51e1282da22aaeb92aae88eb16975 Mon Sep 17 00:00:00 2001 From: Mike West Date: Wed, 20 Oct 2010 14:27:03 +0200 Subject: [PATCH 1/4] Typo: Closes issue #11 Thanks to Paul Chavard ( http://github.com/tchak ) for the report and fix. --- lib/rocco.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rocco.rb b/lib/rocco.rb index ec62b39..467f9ef 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -216,7 +216,7 @@ class Rocco def highlight_webservice(code) Net::HTTP.post_form( URI.parse('http://pygments.appspot.com/'), - {'lang' => @options['language'], 'code' => code} + {'lang' => @options[:language], 'code' => code} ).body end end From 0b392c1094f388de7cf75a561a123114995d6748 Mon Sep 17 00:00:00 2001 From: Mike West Date: Wed, 20 Oct 2010 14:34:09 +0200 Subject: [PATCH 2/4] Attempt to autodetect file language `pygmentize` 1.0+ has an `-N` option that attempts to match a file (via the extension) to a language lexer. If `pygmentize` is installed, we'll run it with this option to get a language. If no language is detected, `pygmentize -N` returns `text`. In that case, we'll first look for a user-provided language to use as a fallback. If no language was provided, highlight using `ruby` as a reasonable default. Closes issue #19. --- lib/rocco.rb | 43 ++++++++++++++++++++++++++++++++++++++----- test/rocco_test.rb | 24 ++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/lib/rocco.rb b/lib/rocco.rb index 467f9ef..ea0293e 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -61,10 +61,18 @@ end # `Rocco.new` takes a source `filename`, an optional list of source filenames # for other documentation sources, an `options` hash, and an optional `block`. -# The `options` hash respects two members: `:language`, which specifies which -# Pygments lexer to use; and `:comment_chars`, which specifies the comment -# characters of the target language. The options default to `'ruby'` and `'#'`, -# respectively. +# The `options` hash respects three members: +# +# * `:language`: specifies which Pygments lexer to use if one can't be +# auto-detected from the filename. _Defaults to `ruby`_. +# +# * `:comment_chars`, which specifies the comment characters of the +# target language. _Defaults to `#`_. +# +# * `:template_file`, which specifies a external template file to use +# when rendering the final, highlighted file via Mustache. _Defaults +# to `nil` (that is, Mustache will use `./lib/rocco/layout.mustache`)_. +# # When `block` is given, it must read the contents of the file using whatever # means necessary and return it as a string. With no `block`, the file is read # to retrieve data. @@ -88,12 +96,37 @@ class Rocco @sources = sources @comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?") @template_file = @options[:template_file] + + @options[:language] = detect_language() @sections = highlight(split(parse(@data))) end + # Returns `true` if `pygmentize` is available locally, `false` otherwise. + def pygmentize? + # Memoize the result, we'll call this a few times + @pygmentize ||= ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") } + end + + # If `pygmentize` is available, we can use it to autodetect a file's + # language based on its filename. Filenames without extensions, or with + # extensions that `pygmentize` doesn't understand will return `text`. In + # that case, we'll fallback to the user-provided `:language` option. + def detect_language + default = @options[:language] + if pygmentize? + lang = %x[pygmentize -N #{@file}].strip! + ( !lang || lang == "text" ) ? default : lang + else + default + end + end + # The filename as given to `Rocco.new`. attr_reader :file + # The merged options array + attr_reader :options + # A list of two-tuples representing each *section* of the source file. Each # item in the list has the form: `[docs_html, code_html]`, where both # elements are strings containing the documentation and source code HTML, @@ -173,7 +206,7 @@ class Rocco # `pygmentize(1)` or code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n") - if ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") } + if pygmentize? code_html = highlight_pygmentize(code_stream) else code_html = highlight_webservice(code_stream) diff --git a/test/rocco_test.rb b/test/rocco_test.rb index 58c0820..5d9d23d 100644 --- a/test/rocco_test.rb +++ b/test/rocco_test.rb @@ -82,6 +82,30 @@ class RoccoBasicTests < Test::Unit::TestCase end +class RoccoLanguageDetection < Test::Unit::TestCase + def test_basic_detection + r = Rocco.new( 'filename.py' ) { "" } + if r.pygmentize? + assert_equal "python", r.detect_language(), "`detect_language()` should return the correct language" + assert_equal "python", r.options[:language], "`@options[:language]` should be set to the correct language" + end + end + def test_fallback_default + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" } + if r.pygmentize? + assert_equal "ruby", r.detect_language(), "`detect_language()` should return `ruby` when nothing else is detected" + assert_equal "ruby", r.options[:language], "`@options[:language]` should be set to `ruby` when nothing else is detected" + end + end + def test_fallback_user + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "c" } ) { "" } + if r.pygmentize? + assert_equal "c", r.detect_language(), "`detect_language()` should return the user's setting when nothing else is detected" + assert_equal "c", r.options[:language], "`@options[:language]` should be set to the user's setting when nothing else is detected" + end + end +end + class RoccoIssueTests < Test::Unit::TestCase def test_issue07_incorrect_parsing_in_c_mode # Precursor to issue #13 below, Rocco incorrectly parsed C/C++ From 020e8050bc5aece24163895cfab8a6b18e26b596 Mon Sep 17 00:00:00 2001 From: Mike West Date: Wed, 20 Oct 2010 17:07:14 +0200 Subject: [PATCH 3/4] Autopopulate comment_chars for known languages Adding comment characters for bash, c, c++, coffee script, java, javascript, lua, python, ruby, and scheme. Paving the way for block-comment parsing later on... Closes issue #20. --- lib/rocco.rb | 98 ++++++++++++++++++++++++++++++++++++++-------- test/rocco_test.rb | 23 +++++++++++ 2 files changed, 105 insertions(+), 16 deletions(-) diff --git a/lib/rocco.rb b/lib/rocco.rb index ea0293e..3082173 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -73,14 +73,16 @@ end # when rendering the final, highlighted file via Mustache. _Defaults # to `nil` (that is, Mustache will use `./lib/rocco/layout.mustache`)_. # -# When `block` is given, it must read the contents of the file using whatever -# means necessary and return it as a string. With no `block`, the file is read -# to retrieve data. class Rocco VERSION = '0.5' def initialize(filename, sources=[], options={}, &block) - @file = filename + @file = filename + @sources = sources + + # When `block` is given, it must read the contents of the file using + # whatever means necessary and return it as a string. With no `block`, + # the file is read to retrieve data. @data = if block_given? yield @@ -93,31 +95,95 @@ class Rocco :template_file => nil } @options = defaults.merge(options) - @sources = sources - @comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?") @template_file = @options[:template_file] - @options[:language] = detect_language() + # If we detect a language + if detect_language() != "text" + # then assign the detected language to `:language` + @options[:language] = detect_language() + # and look for some comment characters + @options[:comment_chars] = generate_comment_chars() + # If we didn't detect a language, but the user provided one, use it + # to look around for comment characters to override the default. + elsif @options[:language] != defaults[:language] + @options[:comment_chars] = generate_comment_chars() + end + @comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?") + @sections = highlight(split(parse(@data))) end # Returns `true` if `pygmentize` is available locally, `false` otherwise. def pygmentize? # Memoize the result, we'll call this a few times - @pygmentize ||= ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") } + @_pygmentize ||= ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") } end # If `pygmentize` is available, we can use it to autodetect a file's # language based on its filename. Filenames without extensions, or with - # extensions that `pygmentize` doesn't understand will return `text`. In - # that case, we'll fallback to the user-provided `:language` option. + # extensions that `pygmentize` doesn't understand will return `text`. + # We'll also return `text` if `pygmentize` isn't available. + # + # We'll memoize the result, as we'll call this a few times. def detect_language - default = @options[:language] - if pygmentize? - lang = %x[pygmentize -N #{@file}].strip! - ( !lang || lang == "text" ) ? default : lang - else - default + @_language ||= begin + if pygmentize? + lang = %x[pygmentize -N #{@file}].strip! + else + "text" + end + end + end + + # Given a file's language, we should be able to autopopulate the + # `comment_chars` variables for single-line comments. If we don't + # have comment characters on record for a given language, we'll + # use the user-provided `:comment_char` option (which defaults to + # `#`). + # + # Comment characters are listed as: + # + # { :single => "//", :multi_start => "/**", :multi_middle => "*", :multi_end => "*/" } + # + # `:single` denotes the leading character of a single-line comment. + # `:multi_start` denotes the string that should appear alone on a + # line of code to begin a block of documentation. `:multi_middle` + # denotes the leading character of block comment content, and + # `:multi_end` is the string that ought appear alone on a line to + # close a block of documentation. That is: + # + # /** [:multi][:start] + # * [:multi][:middle] + # * [:multi][:middle] + # * [:multi][:middle] + # */ [:multi][:end] + # + # If a language only has one type of comment, the missing type + # should be assigned `nil`. + # + # At the moment, we're only returning `:single`. Consider this + # groundwork for block comment parsing. + def generate_comment_chars + @_commentchar ||= begin + language = @options[:language] + comment_styles = { + "bash" => { :single => "#", :multi => nil }, + "c" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "coffee-script" => { :single => "#", :multi => { :start => "###", :middle => nil, :end => "###" } }, + "cpp" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "java" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, + "lua" => { :single => "--", :multi => nil }, + "python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } }, + "ruby" => { :single => "#", :multi => nil }, + "scheme" => { :single => ";;", :multi => nil }, + } + + if comment_styles[language] + comment_styles[language][:single] + else + @options[:comment_chars] + end end end diff --git a/test/rocco_test.rb b/test/rocco_test.rb index 5d9d23d..67c1da3 100644 --- a/test/rocco_test.rb +++ b/test/rocco_test.rb @@ -106,6 +106,29 @@ class RoccoLanguageDetection < Test::Unit::TestCase end end +class RoccoAutomaticCommentChars < Test::Unit::TestCase + def test_basic_detection + r = Rocco.new( 'filename.js' ) { "" } + assert_equal "//", r.options[:comment_chars] + end + def test_fallback_language + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "js" } ) { "" } + assert_equal "//", r.options[:comment_chars] + end + def test_fallback_default + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" } + assert_equal "#", r.options[:comment_chars], "`:comment_chars` should be `#` when falling back to defaults." + end + def test_fallback_user + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :comment_chars => "user" } ) { "" } + assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings." + end + def test_fallback_user_with_unknown_language + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "not-a-language", :comment_chars => "user" } ) { "" } + assert_equal "user", r.options[:comment_chars], "`:comment_chars` should be the user's default when falling back to user-provided settings." + end +end + class RoccoIssueTests < Test::Unit::TestCase def test_issue07_incorrect_parsing_in_c_mode # Precursor to issue #13 below, Rocco incorrectly parsed C/C++ From 185da24fc3a00e598d5c2ab0b646321338b5ccca Mon Sep 17 00:00:00 2001 From: Mike West Date: Wed, 20 Oct 2010 17:07:32 +0200 Subject: [PATCH 4/4] Cleaning up indent spacing in test file. --- test/rocco_test.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/rocco_test.rb b/test/rocco_test.rb index 67c1da3..58597a8 100644 --- a/test/rocco_test.rb +++ b/test/rocco_test.rb @@ -86,22 +86,22 @@ class RoccoLanguageDetection < Test::Unit::TestCase def test_basic_detection r = Rocco.new( 'filename.py' ) { "" } if r.pygmentize? - assert_equal "python", r.detect_language(), "`detect_language()` should return the correct language" - assert_equal "python", r.options[:language], "`@options[:language]` should be set to the correct language" + assert_equal "python", r.detect_language(), "`detect_language()` should return the correct language" + assert_equal "python", r.options[:language], "`@options[:language]` should be set to the correct language" end end def test_fallback_default r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" } if r.pygmentize? - assert_equal "ruby", r.detect_language(), "`detect_language()` should return `ruby` when nothing else is detected" - assert_equal "ruby", r.options[:language], "`@options[:language]` should be set to `ruby` when nothing else is detected" + assert_equal "text", r.detect_language(), "`detect_language()` should return `text` when nothing else is detected" + assert_equal "ruby", r.options[:language], "`@options[:language]` should be set to `ruby` when nothing else is detected" end end def test_fallback_user r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "c" } ) { "" } if r.pygmentize? - assert_equal "c", r.detect_language(), "`detect_language()` should return the user's setting when nothing else is detected" - assert_equal "c", r.options[:language], "`@options[:language]` should be set to the user's setting when nothing else is detected" + assert_equal "text", r.detect_language(), "`detect_language()` should return `text` nothing else is detected" + assert_equal "c", r.options[:language], "`@options[:language]` should be set to the user's setting when nothing else is detected" end end end