Skipping Python/Ruby 1.9 source encoding
In the same way that it makes sense to skip the shebang (#!) line in scripts, it makes sense to skip the encoding definition in Python files (described by [PEP 263][p]) and Ruby 1.9 files (similar enough syntax that it's not worth worrying about. [p]: http://www.python.org/dev/peps/pep-0263/
This commit is contained in:
parent
a4d0e41413
commit
e506c5172a
@ -209,7 +209,7 @@ class Rocco
|
|||||||
"js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } },
|
"js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } },
|
||||||
"lua" => { :single => "--", :multi => nil },
|
"lua" => { :single => "--", :multi => nil },
|
||||||
"python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } },
|
"python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } },
|
||||||
"ruby" => { :single => "#", :multi => nil },
|
"rb" => { :single => "#", :multi => nil },
|
||||||
"scheme" => { :single => ";;", :multi => nil },
|
"scheme" => { :single => ";;", :multi => nil },
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -227,12 +227,14 @@ class Rocco
|
|||||||
# Parse the raw file data into a list of two-tuples. Each tuple has the
|
# Parse the raw file data into a list of two-tuples. Each tuple has the
|
||||||
# form `[docs, code]` where both elements are arrays containing the
|
# form `[docs, code]` where both elements are arrays containing the
|
||||||
# raw lines parsed from the input file. The first line is ignored if it
|
# raw lines parsed from the input file. The first line is ignored if it
|
||||||
# is a shebang line.
|
# is a shebang line. We also ignore the PEP 263 encoding information in
|
||||||
|
# python sourcefiles, and the similar ruby 1.9 syntax.
|
||||||
def parse(data)
|
def parse(data)
|
||||||
sections = []
|
sections = []
|
||||||
docs, code = [], []
|
docs, code = [], []
|
||||||
lines = data.split("\n")
|
lines = data.split("\n")
|
||||||
lines.shift if lines[0] =~ /^\#\!/
|
lines.shift if lines[0] =~ /^\#\!/
|
||||||
|
lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language]
|
||||||
lines.each do |line|
|
lines.each do |line|
|
||||||
case line
|
case line
|
||||||
when @comment_pattern
|
when @comment_pattern
|
||||||
|
@ -82,6 +82,63 @@ class RoccoBasicTests < Test::Unit::TestCase
|
|||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
class RoccoSkippableLines < Test::Unit::TestCase
|
||||||
|
def test_shebang_first_line
|
||||||
|
r = Rocco.new( 'filename.sh' ) { "" }
|
||||||
|
assert_equal(
|
||||||
|
[
|
||||||
|
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||||
|
[ [ "# Comment 2" ], [ "end" ] ]
|
||||||
|
],
|
||||||
|
r.parse( "#!/usr/bin/env bash\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||||
|
"Shebang should be stripped when it appears as the first line."
|
||||||
|
)
|
||||||
|
end
|
||||||
|
def test_shebang_in_content
|
||||||
|
r = Rocco.new( 'filename.sh' ) { "" }
|
||||||
|
assert_equal(
|
||||||
|
[
|
||||||
|
[ [ "# Comment 1", "#!/usr/bin/env bash" ], [ "def codeblock" ] ],
|
||||||
|
[ [ "# Comment 2" ], [ "end" ] ]
|
||||||
|
],
|
||||||
|
r.parse( "# Comment 1\n#!/usr/bin/env bash\ndef codeblock\n# Comment 2\nend\n" ),
|
||||||
|
"Shebang shouldn't be stripped anywhere other than as the first line."
|
||||||
|
)
|
||||||
|
end
|
||||||
|
def test_encoding_in_ruby
|
||||||
|
r = Rocco.new( 'filename.rb' ) { "" }
|
||||||
|
assert_equal(
|
||||||
|
[
|
||||||
|
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||||
|
[ [ "# Comment 2" ], [ "end" ] ]
|
||||||
|
],
|
||||||
|
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||||
|
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
|
||||||
|
)
|
||||||
|
end
|
||||||
|
def test_encoding_in_python
|
||||||
|
r = Rocco.new( 'filename.py' ) { "" }
|
||||||
|
assert_equal(
|
||||||
|
[
|
||||||
|
[ [ "# Comment 1" ], [ "def codeblock" ] ],
|
||||||
|
[ [ "# Comment 2" ], [ "end" ] ]
|
||||||
|
],
|
||||||
|
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||||
|
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
|
||||||
|
)
|
||||||
|
end
|
||||||
|
def test_encoding_in_notpython
|
||||||
|
r = Rocco.new( 'filename.sh' ) { "" }
|
||||||
|
assert_equal(
|
||||||
|
[
|
||||||
|
[ [ "# encoding: utf-8", "# Comment 1" ], [ "def codeblock" ] ],
|
||||||
|
[ [ "# Comment 2" ], [ "end" ] ]
|
||||||
|
],
|
||||||
|
r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ),
|
||||||
|
"Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document."
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
class RoccoLanguageDetection < Test::Unit::TestCase
|
class RoccoLanguageDetection < Test::Unit::TestCase
|
||||||
def test_basic_detection
|
def test_basic_detection
|
||||||
r = Rocco.new( 'filename.py' ) { "" }
|
r = Rocco.new( 'filename.py' ) { "" }
|
||||||
|
Loading…
Reference in New Issue
Block a user