From 0b392c1094f388de7cf75a561a123114995d6748 Mon Sep 17 00:00:00 2001 From: Mike West Date: Wed, 20 Oct 2010 14:34:09 +0200 Subject: [PATCH] Attempt to autodetect file language `pygmentize` 1.0+ has an `-N` option that attempts to match a file (via the extension) to a language lexer. If `pygmentize` is installed, we'll run it with this option to get a language. If no language is detected, `pygmentize -N` returns `text`. In that case, we'll first look for a user-provided language to use as a fallback. If no language was provided, highlight using `ruby` as a reasonable default. Closes issue #19. --- lib/rocco.rb | 43 ++++++++++++++++++++++++++++++++++++++----- test/rocco_test.rb | 24 ++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/lib/rocco.rb b/lib/rocco.rb index 467f9ef..ea0293e 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -61,10 +61,18 @@ end # `Rocco.new` takes a source `filename`, an optional list of source filenames # for other documentation sources, an `options` hash, and an optional `block`. -# The `options` hash respects two members: `:language`, which specifies which -# Pygments lexer to use; and `:comment_chars`, which specifies the comment -# characters of the target language. The options default to `'ruby'` and `'#'`, -# respectively. +# The `options` hash respects three members: +# +# * `:language`: specifies which Pygments lexer to use if one can't be +# auto-detected from the filename. _Defaults to `ruby`_. +# +# * `:comment_chars`, which specifies the comment characters of the +# target language. _Defaults to `#`_. +# +# * `:template_file`, which specifies a external template file to use +# when rendering the final, highlighted file via Mustache. _Defaults +# to `nil` (that is, Mustache will use `./lib/rocco/layout.mustache`)_. +# # When `block` is given, it must read the contents of the file using whatever # means necessary and return it as a string. With no `block`, the file is read # to retrieve data. @@ -88,12 +96,37 @@ class Rocco @sources = sources @comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?") @template_file = @options[:template_file] + + @options[:language] = detect_language() @sections = highlight(split(parse(@data))) end + # Returns `true` if `pygmentize` is available locally, `false` otherwise. + def pygmentize? + # Memoize the result, we'll call this a few times + @pygmentize ||= ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") } + end + + # If `pygmentize` is available, we can use it to autodetect a file's + # language based on its filename. Filenames without extensions, or with + # extensions that `pygmentize` doesn't understand will return `text`. In + # that case, we'll fallback to the user-provided `:language` option. + def detect_language + default = @options[:language] + if pygmentize? + lang = %x[pygmentize -N #{@file}].strip! + ( !lang || lang == "text" ) ? default : lang + else + default + end + end + # The filename as given to `Rocco.new`. attr_reader :file + # The merged options array + attr_reader :options + # A list of two-tuples representing each *section* of the source file. Each # item in the list has the form: `[docs_html, code_html]`, where both # elements are strings containing the documentation and source code HTML, @@ -173,7 +206,7 @@ class Rocco # `pygmentize(1)` or code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n") - if ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") } + if pygmentize? code_html = highlight_pygmentize(code_stream) else code_html = highlight_webservice(code_stream) diff --git a/test/rocco_test.rb b/test/rocco_test.rb index 58c0820..5d9d23d 100644 --- a/test/rocco_test.rb +++ b/test/rocco_test.rb @@ -82,6 +82,30 @@ class RoccoBasicTests < Test::Unit::TestCase end +class RoccoLanguageDetection < Test::Unit::TestCase + def test_basic_detection + r = Rocco.new( 'filename.py' ) { "" } + if r.pygmentize? + assert_equal "python", r.detect_language(), "`detect_language()` should return the correct language" + assert_equal "python", r.options[:language], "`@options[:language]` should be set to the correct language" + end + end + def test_fallback_default + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" } + if r.pygmentize? + assert_equal "ruby", r.detect_language(), "`detect_language()` should return `ruby` when nothing else is detected" + assert_equal "ruby", r.options[:language], "`@options[:language]` should be set to `ruby` when nothing else is detected" + end + end + def test_fallback_user + r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "c" } ) { "" } + if r.pygmentize? + assert_equal "c", r.detect_language(), "`detect_language()` should return the user's setting when nothing else is detected" + assert_equal "c", r.options[:language], "`@options[:language]` should be set to the user's setting when nothing else is detected" + end + end +end + class RoccoIssueTests < Test::Unit::TestCase def test_issue07_incorrect_parsing_in_c_mode # Precursor to issue #13 below, Rocco incorrectly parsed C/C++