UTF-8 encoding/decoding for Ruby 1.9.
This commit is contained in:
parent
de59adecee
commit
5793ce986b
15
README.rdoc
15
README.rdoc
|
@ -49,6 +49,21 @@ be running, of course.
|
|||
See also the test code, especially tests/test_db_api.rb.
|
||||
|
||||
|
||||
= Notes
|
||||
|
||||
== String Encoding
|
||||
|
||||
The BSON ("Binary JSON") format used to communicate with Mongo requires that
|
||||
strings be UTF-8 (http://en.wikipedia.org/wiki/UTF-8).
|
||||
|
||||
Ruby 1.9 has built-in character encoding support. All strings sent to Mongo
|
||||
and received from Mongo are converted to UTF-8 when necessary, and strings
|
||||
read from Mongo will have their character encodings set to UTF-8.
|
||||
|
||||
When used with Ruby 1.8, the bytes in each string are written to and read from
|
||||
Mongo as-is. If the string is ASCII all is well, because ASCII is a subset of
|
||||
UTF-8. If the string is not ASCII then it may not be a well-formed UTF-8 string.
|
||||
|
||||
= Testing
|
||||
|
||||
If you have the source code, you can run the tests.
|
||||
|
|
|
@ -46,8 +46,18 @@ class BSON
|
|||
NUMBER_INT = 16
|
||||
MAXKEY = 127
|
||||
|
||||
if RUBY_VERSION >= '1.9'
|
||||
def self.to_utf8(str)
|
||||
str.encode("utf-8")
|
||||
end
|
||||
else
|
||||
def self.to_utf8(str)
|
||||
str # TODO punt for now
|
||||
end
|
||||
end
|
||||
|
||||
def self.serialize_cstr(buf, val)
|
||||
buf.put_array(val.to_s.unpack("C*") + [0])
|
||||
buf.put_array(to_utf8(val.to_s).unpack("C*") + [0])
|
||||
end
|
||||
|
||||
def initialize(db=nil)
|
||||
|
@ -173,6 +183,7 @@ class BSON
|
|||
doc
|
||||
end
|
||||
|
||||
# For debugging.
|
||||
def hex_dump
|
||||
str = ''
|
||||
@buf.to_a.each_with_index { |b,i|
|
||||
|
@ -231,7 +242,11 @@ class BSON
|
|||
def deserialize_string_data(buf)
|
||||
len = buf.get_int
|
||||
bytes = buf.get(len)
|
||||
bytes[0..-2].pack("C*")
|
||||
str = bytes[0..-2].pack("C*")
|
||||
if RUBY_VERSION >= '1.9'
|
||||
str.force_encoding("utf-8")
|
||||
end
|
||||
str
|
||||
end
|
||||
|
||||
def deserialize_oid_data(buf)
|
||||
|
@ -385,6 +400,9 @@ class BSON
|
|||
break if b == 0
|
||||
chars << b.chr
|
||||
end
|
||||
if RUBY_VERSION >= '1.9'
|
||||
chars.force_encoding("utf-8") # Mongo stores UTF-8
|
||||
end
|
||||
chars
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue