Specify encoding for Pygments
This closes issue #10, in theory, but I'm not completely happy with the behavior. The output for both UTF-8 and ISO-8859-1 sources is arguably correct, but I think it'd be better to do some autodetecting of the file encoding, and explicitly convert everything to UTF-8 on input. One option is the [`chardet` gem][gem], but I'm loath to add another dependency to Rocco... [gem]: http://rubygems.org/gems/chardet/versions/0.9.0
This commit is contained in:
parent
38683a8cc2
commit
1b211bcc08
@ -194,7 +194,7 @@ class Rocco
|
||||
# then fork off a child process to write the input.
|
||||
def highlight_pygmentize(code)
|
||||
code_html = nil
|
||||
open("|pygmentize -l #{@options[:language]} -f html", 'r+') do |fd|
|
||||
open("|pygmentize -l #{@options[:language]} -O encoding=utf-8 -f html", 'r+') do |fd|
|
||||
pid =
|
||||
fork {
|
||||
fd.close_read
|
||||
|
1
test/fixtures/issue10.iso-8859-1.rb
vendored
Normal file
1
test/fixtures/issue10.iso-8859-1.rb
vendored
Normal file
@ -0,0 +1 @@
|
||||
# hello wörld
|
1
test/fixtures/issue10.rb
vendored
1
test/fixtures/issue10.rb
vendored
@ -1 +0,0 @@
|
||||
hello ąćęłńóśźż
|
1
test/fixtures/issue10.utf-8.rb
vendored
Normal file
1
test/fixtures/issue10.utf-8.rb
vendored
Normal file
@ -0,0 +1 @@
|
||||
# hello ąćęłńóśźż
|
@ -101,10 +101,20 @@ class RoccoIssueTests < Test::Unit::TestCase
|
||||
def test_issue10_utf8_processing
|
||||
# Rocco has issues with strange UTF-8 characters: need to explicitly set the encoding for Pygments
|
||||
# http://github.com/rtomayko/rocco/issues#issue/10
|
||||
r = Rocco.new( File.dirname(__FILE__) + "/fixtures/issue10.rb" )
|
||||
r = Rocco.new( File.dirname(__FILE__) + "/fixtures/issue10.utf-8.rb" )
|
||||
assert_equal(
|
||||
"<p>hello ąćęłńóśźż</p>\n",
|
||||
r.sections[0][0]
|
||||
r.sections[0][0],
|
||||
"UTF-8 input files ought behave correctly."
|
||||
)
|
||||
# and, just for grins, ensure that iso-8859-1 works too.
|
||||
# @TODO: Is this really the correct behavior? Converting text
|
||||
# to UTF-8 on the way out is probably preferable.
|
||||
r = Rocco.new( File.dirname(__FILE__) + "/fixtures/issue10.iso-8859-1.rb" )
|
||||
assert_equal(
|
||||
"<p>hello w\366rld</p>\n",
|
||||
r.sections[0][0],
|
||||
"ISO-8859-1 input should probably also behave correctly."
|
||||
)
|
||||
end
|
||||
def test_issue12_css_octothorpe_classname_change
|
||||
|
Loading…
Reference in New Issue
Block a user