From e506c5172a81883b7f01a9b3de4daad222cf5e72 Mon Sep 17 00:00:00 2001 From: Mike West Date: Thu, 21 Oct 2010 13:45:47 +0200 Subject: [PATCH] Skipping Python/Ruby 1.9 source encoding In the same way that it makes sense to skip the shebang (#!) line in scripts, it makes sense to skip the encoding definition in Python files (described by [PEP 263][p]) and Ruby 1.9 files (similar enough syntax that it's not worth worrying about. [p]: http://www.python.org/dev/peps/pep-0263/ --- lib/rocco.rb | 6 +++-- test/rocco_test.rb | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/lib/rocco.rb b/lib/rocco.rb index 30db0f7..0b2cbb6 100644 --- a/lib/rocco.rb +++ b/lib/rocco.rb @@ -209,7 +209,7 @@ class Rocco "js" => { :single => "//", :multi => { :start => "/**", :middle => "*", :end => "*/" } }, "lua" => { :single => "--", :multi => nil }, "python" => { :single => "#", :multi => { :start => '"""', :middle => nil, :end => '"""' } }, - "ruby" => { :single => "#", :multi => nil }, + "rb" => { :single => "#", :multi => nil }, "scheme" => { :single => ";;", :multi => nil }, } @@ -227,12 +227,14 @@ class Rocco # Parse the raw file data into a list of two-tuples. Each tuple has the # form `[docs, code]` where both elements are arrays containing the # raw lines parsed from the input file. The first line is ignored if it - # is a shebang line. + # is a shebang line. We also ignore the PEP 263 encoding information in + # python sourcefiles, and the similar ruby 1.9 syntax. def parse(data) sections = [] docs, code = [], [] lines = data.split("\n") lines.shift if lines[0] =~ /^\#\!/ + lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ and [ "python", "rb" ].include? @options[:language] lines.each do |line| case line when @comment_pattern diff --git a/test/rocco_test.rb b/test/rocco_test.rb index 58597a8..58fcc3b 100644 --- a/test/rocco_test.rb +++ b/test/rocco_test.rb @@ -82,6 +82,63 @@ class RoccoBasicTests < Test::Unit::TestCase end +class RoccoSkippableLines < Test::Unit::TestCase + def test_shebang_first_line + r = Rocco.new( 'filename.sh' ) { "" } + assert_equal( + [ + [ [ "# Comment 1" ], [ "def codeblock" ] ], + [ [ "# Comment 2" ], [ "end" ] ] + ], + r.parse( "#!/usr/bin/env bash\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ), + "Shebang should be stripped when it appears as the first line." + ) + end + def test_shebang_in_content + r = Rocco.new( 'filename.sh' ) { "" } + assert_equal( + [ + [ [ "# Comment 1", "#!/usr/bin/env bash" ], [ "def codeblock" ] ], + [ [ "# Comment 2" ], [ "end" ] ] + ], + r.parse( "# Comment 1\n#!/usr/bin/env bash\ndef codeblock\n# Comment 2\nend\n" ), + "Shebang shouldn't be stripped anywhere other than as the first line." + ) + end + def test_encoding_in_ruby + r = Rocco.new( 'filename.rb' ) { "" } + assert_equal( + [ + [ [ "# Comment 1" ], [ "def codeblock" ] ], + [ [ "# Comment 2" ], [ "end" ] ] + ], + r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ), + "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document." + ) + end + def test_encoding_in_python + r = Rocco.new( 'filename.py' ) { "" } + assert_equal( + [ + [ [ "# Comment 1" ], [ "def codeblock" ] ], + [ [ "# Comment 2" ], [ "end" ] ] + ], + r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ), + "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document." + ) + end + def test_encoding_in_notpython + r = Rocco.new( 'filename.sh' ) { "" } + assert_equal( + [ + [ [ "# encoding: utf-8", "# Comment 1" ], [ "def codeblock" ] ], + [ [ "# Comment 2" ], [ "end" ] ] + ], + r.parse( "#!/usr/bin/env bash\n# encoding: utf-8\n# Comment 1\ndef codeblock\n# Comment 2\nend\n" ), + "Strings matching the PEP 263 encoding definition regex should be stripped when they appear at the top of a python document." + ) + end +end class RoccoLanguageDetection < Test::Unit::TestCase def test_basic_detection r = Rocco.new( 'filename.py' ) { "" }