Specify encoding for Pygments
This closes issue #10, in theory, but I'm not completely happy with the behavior. The output for both UTF-8 and ISO-8859-1 sources is arguably correct, but I think it'd be better to do some autodetecting of the file encoding, and explicitly convert everything to UTF-8 on input. One option is the [`chardet` gem][gem], but I'm loath to add another dependency to Rocco... [gem]: http://rubygems.org/gems/chardet/versions/0.9.0
This commit is contained in:
parent
38683a8cc2
commit
1b211bcc08
@ -194,7 +194,7 @@ class Rocco
|
|||||||
# then fork off a child process to write the input.
|
# then fork off a child process to write the input.
|
||||||
def highlight_pygmentize(code)
|
def highlight_pygmentize(code)
|
||||||
code_html = nil
|
code_html = nil
|
||||||
open("|pygmentize -l #{@options[:language]} -f html", 'r+') do |fd|
|
open("|pygmentize -l #{@options[:language]} -O encoding=utf-8 -f html", 'r+') do |fd|
|
||||||
pid =
|
pid =
|
||||||
fork {
|
fork {
|
||||||
fd.close_read
|
fd.close_read
|
||||||
|
1
test/fixtures/issue10.iso-8859-1.rb
vendored
Normal file
1
test/fixtures/issue10.iso-8859-1.rb
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
# hello wörld
|
1
test/fixtures/issue10.rb
vendored
1
test/fixtures/issue10.rb
vendored
@ -1 +0,0 @@
|
|||||||
hello ąćęłńóśźż
|
|
1
test/fixtures/issue10.utf-8.rb
vendored
Normal file
1
test/fixtures/issue10.utf-8.rb
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
# hello ąćęłńóśźż
|
@ -101,10 +101,20 @@ class RoccoIssueTests < Test::Unit::TestCase
|
|||||||
def test_issue10_utf8_processing
|
def test_issue10_utf8_processing
|
||||||
# Rocco has issues with strange UTF-8 characters: need to explicitly set the encoding for Pygments
|
# Rocco has issues with strange UTF-8 characters: need to explicitly set the encoding for Pygments
|
||||||
# http://github.com/rtomayko/rocco/issues#issue/10
|
# http://github.com/rtomayko/rocco/issues#issue/10
|
||||||
r = Rocco.new( File.dirname(__FILE__) + "/fixtures/issue10.rb" )
|
r = Rocco.new( File.dirname(__FILE__) + "/fixtures/issue10.utf-8.rb" )
|
||||||
assert_equal(
|
assert_equal(
|
||||||
"<p> hello ąćęłńóśźż</p>\n",
|
"<p>hello ąćęłńóśźż</p>\n",
|
||||||
r.sections[0][0]
|
r.sections[0][0],
|
||||||
|
"UTF-8 input files ought behave correctly."
|
||||||
|
)
|
||||||
|
# and, just for grins, ensure that iso-8859-1 works too.
|
||||||
|
# @TODO: Is this really the correct behavior? Converting text
|
||||||
|
# to UTF-8 on the way out is probably preferable.
|
||||||
|
r = Rocco.new( File.dirname(__FILE__) + "/fixtures/issue10.iso-8859-1.rb" )
|
||||||
|
assert_equal(
|
||||||
|
"<p>hello w\366rld</p>\n",
|
||||||
|
r.sections[0][0],
|
||||||
|
"ISO-8859-1 input should probably also behave correctly."
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
def test_issue12_css_octothorpe_classname_change
|
def test_issue12_css_octothorpe_classname_change
|
||||||
|
Loading…
Reference in New Issue
Block a user