Attempt to autodetect file language
`pygmentize` 1.0+ has an `-N` option that attempts to match a file (via the extension) to a language lexer. If `pygmentize` is installed, we'll run it with this option to get a language. If no language is detected, `pygmentize -N` returns `text`. In that case, we'll first look for a user-provided language to use as a fallback. If no language was provided, highlight using `ruby` as a reasonable default. Closes issue #19.
This commit is contained in:
parent
a43f0fc584
commit
0b392c1094
43
lib/rocco.rb
43
lib/rocco.rb
@ -61,10 +61,18 @@ end
|
|||||||
|
|
||||||
# `Rocco.new` takes a source `filename`, an optional list of source filenames
|
# `Rocco.new` takes a source `filename`, an optional list of source filenames
|
||||||
# for other documentation sources, an `options` hash, and an optional `block`.
|
# for other documentation sources, an `options` hash, and an optional `block`.
|
||||||
# The `options` hash respects two members: `:language`, which specifies which
|
# The `options` hash respects three members:
|
||||||
# Pygments lexer to use; and `:comment_chars`, which specifies the comment
|
#
|
||||||
# characters of the target language. The options default to `'ruby'` and `'#'`,
|
# * `:language`: specifies which Pygments lexer to use if one can't be
|
||||||
# respectively.
|
# auto-detected from the filename. _Defaults to `ruby`_.
|
||||||
|
#
|
||||||
|
# * `:comment_chars`, which specifies the comment characters of the
|
||||||
|
# target language. _Defaults to `#`_.
|
||||||
|
#
|
||||||
|
# * `:template_file`, which specifies a external template file to use
|
||||||
|
# when rendering the final, highlighted file via Mustache. _Defaults
|
||||||
|
# to `nil` (that is, Mustache will use `./lib/rocco/layout.mustache`)_.
|
||||||
|
#
|
||||||
# When `block` is given, it must read the contents of the file using whatever
|
# When `block` is given, it must read the contents of the file using whatever
|
||||||
# means necessary and return it as a string. With no `block`, the file is read
|
# means necessary and return it as a string. With no `block`, the file is read
|
||||||
# to retrieve data.
|
# to retrieve data.
|
||||||
@ -88,12 +96,37 @@ class Rocco
|
|||||||
@sources = sources
|
@sources = sources
|
||||||
@comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?")
|
@comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars]}\s?")
|
||||||
@template_file = @options[:template_file]
|
@template_file = @options[:template_file]
|
||||||
|
|
||||||
|
@options[:language] = detect_language()
|
||||||
@sections = highlight(split(parse(@data)))
|
@sections = highlight(split(parse(@data)))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Returns `true` if `pygmentize` is available locally, `false` otherwise.
|
||||||
|
def pygmentize?
|
||||||
|
# Memoize the result, we'll call this a few times
|
||||||
|
@pygmentize ||= ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") }
|
||||||
|
end
|
||||||
|
|
||||||
|
# If `pygmentize` is available, we can use it to autodetect a file's
|
||||||
|
# language based on its filename. Filenames without extensions, or with
|
||||||
|
# extensions that `pygmentize` doesn't understand will return `text`. In
|
||||||
|
# that case, we'll fallback to the user-provided `:language` option.
|
||||||
|
def detect_language
|
||||||
|
default = @options[:language]
|
||||||
|
if pygmentize?
|
||||||
|
lang = %x[pygmentize -N #{@file}].strip!
|
||||||
|
( !lang || lang == "text" ) ? default : lang
|
||||||
|
else
|
||||||
|
default
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# The filename as given to `Rocco.new`.
|
# The filename as given to `Rocco.new`.
|
||||||
attr_reader :file
|
attr_reader :file
|
||||||
|
|
||||||
|
# The merged options array
|
||||||
|
attr_reader :options
|
||||||
|
|
||||||
# A list of two-tuples representing each *section* of the source file. Each
|
# A list of two-tuples representing each *section* of the source file. Each
|
||||||
# item in the list has the form: `[docs_html, code_html]`, where both
|
# item in the list has the form: `[docs_html, code_html]`, where both
|
||||||
# elements are strings containing the documentation and source code HTML,
|
# elements are strings containing the documentation and source code HTML,
|
||||||
@ -173,7 +206,7 @@ class Rocco
|
|||||||
# `pygmentize(1)` or <http://pygments.appspot.com>
|
# `pygmentize(1)` or <http://pygments.appspot.com>
|
||||||
code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n")
|
code_stream = code_blocks.join("\n\n#{@options[:comment_chars]} DIVIDER\n\n")
|
||||||
|
|
||||||
if ENV['PATH'].split(':').any? { |dir| executable?("#{dir}/pygmentize") }
|
if pygmentize?
|
||||||
code_html = highlight_pygmentize(code_stream)
|
code_html = highlight_pygmentize(code_stream)
|
||||||
else
|
else
|
||||||
code_html = highlight_webservice(code_stream)
|
code_html = highlight_webservice(code_stream)
|
||||||
|
@ -82,6 +82,30 @@ class RoccoBasicTests < Test::Unit::TestCase
|
|||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class RoccoLanguageDetection < Test::Unit::TestCase
|
||||||
|
def test_basic_detection
|
||||||
|
r = Rocco.new( 'filename.py' ) { "" }
|
||||||
|
if r.pygmentize?
|
||||||
|
assert_equal "python", r.detect_language(), "`detect_language()` should return the correct language"
|
||||||
|
assert_equal "python", r.options[:language], "`@options[:language]` should be set to the correct language"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def test_fallback_default
|
||||||
|
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever' ) { "" }
|
||||||
|
if r.pygmentize?
|
||||||
|
assert_equal "ruby", r.detect_language(), "`detect_language()` should return `ruby` when nothing else is detected"
|
||||||
|
assert_equal "ruby", r.options[:language], "`@options[:language]` should be set to `ruby` when nothing else is detected"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def test_fallback_user
|
||||||
|
r = Rocco.new( 'filename.an_extension_with_no_meaning_whatsoever', '', { :language => "c" } ) { "" }
|
||||||
|
if r.pygmentize?
|
||||||
|
assert_equal "c", r.detect_language(), "`detect_language()` should return the user's setting when nothing else is detected"
|
||||||
|
assert_equal "c", r.options[:language], "`@options[:language]` should be set to the user's setting when nothing else is detected"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
class RoccoIssueTests < Test::Unit::TestCase
|
class RoccoIssueTests < Test::Unit::TestCase
|
||||||
def test_issue07_incorrect_parsing_in_c_mode
|
def test_issue07_incorrect_parsing_in_c_mode
|
||||||
# Precursor to issue #13 below, Rocco incorrectly parsed C/C++
|
# Precursor to issue #13 below, Rocco incorrectly parsed C/C++
|
||||||
|
Loading…
Reference in New Issue
Block a user