Attempt to autodetect file language
`pygmentize` 1.0+ has an `-N` option that attempts to match a file (via the extension) to a language lexer. If `pygmentize` is installed, we'll run it with this option to get a language. If no language is detected, `pygmentize -N` returns `text`. In that case, we'll first look for a user-provided language to use as a fallback. If no language was provided, highlight using `ruby` as a reasonable default. Closes issue #19.
This commit is contained in:
parent
a43f0fc584
commit
0b392c1094
43
lib/rocco.rb
43
lib/rocco.rb
@ -61,10 +61,18 @@ end
|
||||
|
||||
# `Rocco.new` takes a source `filename`, an optional list of source filenames
|
||||
# for other documentation sources, an `options` hash, and an optional `block`.
|
||||
# The `options` hash respects two members: `:language`, which specifies which
|
||||
# Pygments lexer to use; and `:comment_chars`, which specifies the comment
|
||||
# characters of the target language. The options default to `'ruby'` and `'#'`,
|
||||
# respectively.
|
||||
# The `options` hash respects three members:
|
||||
#
|
||||
# * `:language`: specifies which Pygments lexer to use if one can't be
|
||||
# auto-detected from the filename. _Defaults to `ruby`_.
|
||||
#
|
||||
# * `:comment_chars`, which specifies the comment characters of the
|
||||
# target language. _Defaults to `#`_.
|
||||
#
|
||||
# * `:template_file`, which specifies a external template file to use
|
||||
# when rendering the final, highlighted file via Mustache. _Defaults
|
||||
# to `nil` (that is, Mustache will use `./lib/rocco/layout.mustache`)_.
|
||||
#
|
||||
# When `block` is given, it must read the contents of the file using whatever
|
||||
# means necessary and return it as a string. With no `block`, the file is read
|
||||
# to retrieve data.
|
||||
@ -88,12 +96,37 @@ class Rocco
|
||||
@sources = sources
|
||||
@comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?")
|
||||
@template_file = @options[:template_file]
|
||||
|
||||
@options[:language] = detect_language()
|
||||
@sections = highlight(split(parse(@data)))
|
||||
end
|
||||
|
||||
# Returns `true` if `pygmentize` is available locally, `false` otherwise.
|
||||
def pygmentize?
|
||||
# Memoize the result, we'll call this a few times
|
||||
@pygmentize ||= ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") }
|
||||
end
|
||||
|
||||
# If `pygmentize` is available, we can use it to autodetect a file's
|
||||
# language based on its filename. Filenames without extensions, or with
|
||||
# extensions that `pygmentize` doesn't understand will return `text`. In
|
||||
# that case, we'll fallback to the user-provided `:language` option.
|
||||
def detect_language
|
||||
default = @options[:language]
|
||||
if pygmentize?
|
||||
lang = %x[pygmentize -N #{@file}].strip!
|
||||
( !lang || lang == "text" ) ? default : lang
|
||||
else
|
||||
default
|
||||
end
|
||||
end
|
||||
|
||||
# The filename as given to `Rocco.new`.
|
||||
attr_reader :file
|
||||
|
||||
# The merged options array
|
||||
attr_reader :options
|
||||
|
||||
# A list of two-tuples representing each *section* of the source file. Each
|
||||
# item in the list has the form: `[docs_html, code_html]`, where both
|
||||
# elements are strings containing the documentation and source code HTML,
|
||||
@ -173,7 +206,7 @@ class Rocco
|
||||
# `pygmentize(1)` or <http://pygments.appspot.com>
|
||||
code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n")
|
||||
|
||||
if ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") }
|
||||
if pygmentize?
|
||||
code_html = highlight_pygmentize(code_stream)
|
||||
else
|
||||
code_html = highlight_webservice(code_stream)
|
||||
|
@ -82,6 +82,30 @@ class RoccoBasicTests < Test::Unit::TestCase
|
||||
|
||||
end
|
||||
|
||||
class RoccoLanguageDetection < Test::Unit::TestCase
|
||||
def test_basic_detection
|
||||
r = Rocco.new( 'filename.py' ) { "" }
|
||||
if r.pygmentize?
|
||||
assert_equal "python", r.detect_language(), "`detect_language()` should return the correct language"
|
||||
assert_equal "python", r.options[:language], "`@options[:language]` should be set to the correct language"
|
||||
end
|
||||
end
|
||||
def test_fallback_default
|
||||
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" }
|
||||
if r.pygmentize?
|
||||
assert_equal "ruby", r.detect_language(), "`detect_language()` should return `ruby` when nothing else is detected"
|
||||
assert_equal "ruby", r.options[:language], "`@options[:language]` should be set to `ruby` when nothing else is detected"
|
||||
end
|
||||
end
|
||||
def test_fallback_user
|
||||
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "c" } ) { "" }
|
||||
if r.pygmentize?
|
||||
assert_equal "c", r.detect_language(), "`detect_language()` should return the user's setting when nothing else is detected"
|
||||
assert_equal "c", r.options[:language], "`@options[:language]` should be set to the user's setting when nothing else is detected"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class RoccoIssueTests < Test::Unit::TestCase
|
||||
def test_issue07_incorrect_parsing_in_c_mode
|
||||
# Precursor to issue #13 below, Rocco incorrectly parsed C/C++
|
||||
|
Loading…
Reference in New Issue
Block a user