Skipping Python/Ruby 1.9 source encoding

In the same way that it makes sense to skip the shebang (#!) line in
scripts, it makes sense to skip the encoding definition in Python files
(described by [PEP 263][p]) and Ruby 1.9 files (similar enough syntax
that it's not worth worrying about.

[p]: http://www.python.org/dev/peps/pep-0263/
This commit is contained in:
Mike West 2010-10-21 13:45:47 +02:00
parent a4d0e41413
commit e506c5172a
2 changed files with 61 additions and 2 deletions

View File

@ -209,7 +209,7 @@ class Rocco
"js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, "js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } },
"lua" => { :single => "--", :multi => nil }, "lua" => { :single => "--", :multi => nil },
"python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } }, "python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } },
"ruby" => { :single => "#", :multi => nil }, "rb" => { :single => "#", :multi => nil },
"scheme" => { :single => ";;", :multi => nil }, "scheme" => { :single => ";;", :multi => nil },
} }
@ -227,12 +227,14 @@ class Rocco
# Parse the raw file data into a list of two-tuples. Each tuple has the # Parse the raw file data into a list of two-tuples. Each tuple has the
# form `[docs, code]` where both elements are arrays containing the # form `[docs, code]` where both elements are arrays containing the
# raw lines parsed from the input file. The first line is ignored if it # raw lines parsed from the input file. The first line is ignored if it
# is a shebang line. # is a shebang line. We also ignore the PEP 263 encoding information in
# python sourcefiles, and the similar ruby 1.9 syntax.
def parse(data) def parse(data)
sections = [] sections = []
docs, code = [], [] docs, code = [], []
lines = data.split("\n") lines = data.split("\n")
lines.shift if lines[0] =~ /^\#\!/ lines.shift if lines[0] =~ /^\#\!/
lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language]
lines.each do |line| lines.each do |line|
case line case line
when @comment_pattern when @comment_pattern

View File

@ -82,6 +82,63 @@ class RoccoBasicTests < Test::Unit::TestCase
end end
class RoccoSkippableLines < Test::Unit::TestCase
def test_shebang_first_line
r = Rocco.new( 'filename.sh' ) { "" }
assert_equal(
[
[ [ "# Comment 1" ], [ "def codeblock" ] ],
[ [ "# Comment 2" ], [ "end" ] ]
],
r.parse( "#!/usr/bin/env bash\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
"Shebang should be stripped when it appears as the first line."
)
end
def test_shebang_in_content
r = Rocco.new( 'filename.sh' ) { "" }
assert_equal(
[
[ [ "# Comment 1", "#!/usr/bin/env bash" ], [ "def codeblock" ] ],
[ [ "# Comment 2" ], [ "end" ] ]
],
r.parse( "# Comment 1\n#!/usr/bin/env bash\ndef codeblock\n# Comment 2\nend\n" ),
"Shebang shouldn't be stripped anywhere other than as the first line."
)
end
def test_encoding_in_ruby
r = Rocco.new( 'filename.rb' ) { "" }
assert_equal(
[
[ [ "# Comment 1" ], [ "def codeblock" ] ],
[ [ "# Comment 2" ], [ "end" ] ]
],
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
)
end
def test_encoding_in_python
r = Rocco.new( 'filename.py' ) { "" }
assert_equal(
[
[ [ "# Comment 1" ], [ "def codeblock" ] ],
[ [ "# Comment 2" ], [ "end" ] ]
],
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
)
end
def test_encoding_in_notpython
r = Rocco.new( 'filename.sh' ) { "" }
assert_equal(
[
[ [ "# encoding: utf-8", "# Comment 1" ], [ "def codeblock" ] ],
[ [ "# Comment 2" ], [ "end" ] ]
],
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
)
end
end
class RoccoLanguageDetection < Test::Unit::TestCase class RoccoLanguageDetection < Test::Unit::TestCase
def test_basic_detection def test_basic_detection
r = Rocco.new( 'filename.py' ) { "" } r = Rocco.new( 'filename.py' ) { "" }