starting to clean things up to read real xmp files

This commit is contained in:
John Bintz 2011-03-18 12:28:53 -04:00
parent b374a40752
commit 8f9141bece
15 changed files with 300 additions and 32 deletions

7
bin/avm2avm Executable file
View File

@ -0,0 +1,7 @@
#!/usr/bin/env ruby
require 'thor'
require 'avm/cli'
AVM::CLI.start

17
lib/avm/cli.rb Normal file
View File

@ -0,0 +1,17 @@
require 'thor'
require 'avm/image'
require 'pp'
module AVM
class CLI < ::Thor
default_task :convert
desc 'convert', "Convert a file from one format to another"
def convert
data = $stdin.read
pp AVM::Image.from_xml(data).to_h
end
end
end

View File

@ -75,12 +75,12 @@ module AVM
def from_xml(image, document) def from_xml(image, document)
contacts = [] contacts = []
document.get_refs do |refs| document.get_refs do |refs|
refs[:dublin_core].search('.//rdf:li').each do |name| refs[:dublin_core].search('.//dc:creator//rdf:li').each do |name|
contacts << { :name => name.text } contacts << { :name => name.text.strip }
end end
IPTC_MULTI_FIELD_MAP.each do |key, element_name| IPTC_MULTI_FIELD_MAP.each do |key, element_name|
if node = refs[:iptc].at_xpath("//Iptc4xmpCore:#{element_name}") if node = refs[:iptc].at_xpath(".//Iptc4xmpCore:#{element_name}")
node.text.split(',').collect(&:strip).each_with_index do |value, index| node.text.split(',').collect(&:strip).each_with_index do |value, index|
contacts[index][key] = value contacts[index][key] = value
end end

View File

@ -11,12 +11,16 @@ module AVM
class Image class Image
DUBLIN_CORE_FIELDS = [ :title, :description ] DUBLIN_CORE_FIELDS = [ :title, :description ]
PHOTOSHOP_SINGLES = {
'Headline' => :headline,
'DateCreated' => :date
}
AVM_SINGLE_FIELDS = [ AVM_SINGLE_FIELDS = [
'Distance.Notes', 'Distance.Notes',
'Spectral.Notes', 'Spectral.Notes',
'ReferenceURL', 'ReferenceURL',
'Credit', 'Credit',
'Date',
'ID', 'ID',
'Type', 'Type',
'Image.ProductQuality', 'Image.ProductQuality',
@ -40,7 +44,6 @@ module AVM
:spectral_notes, :spectral_notes,
:reference_url, :reference_url,
:credit, :credit,
:date,
:id, :id,
:type, :type,
:quality, :quality,
@ -64,7 +67,6 @@ module AVM
:spectral_notes, :spectral_notes,
:reference_url, :reference_url,
:credit, :credit,
:string_date,
:id, :id,
:image_type, :image_type,
:image_quality, :image_quality,
@ -141,6 +143,7 @@ module AVM
end end
refs[:photoshop].add_child(%{<photoshop:Headline>#{headline}</photoshop:Headline>}) refs[:photoshop].add_child(%{<photoshop:Headline>#{headline}</photoshop:Headline>})
refs[:photoshop].add_child(%{<photoshop:DateCreated>#{string_date}</photoshop:DateCreated>})
AVM_SINGLES_FOR_MESSAGES.each do |tag, message| AVM_SINGLES_FOR_MESSAGES.each do |tag, message|
if value = send(message) if value = send(message)
@ -229,14 +232,18 @@ module AVM
end end
end end
if node = refs[:photoshop].at_xpath('./photoshop:Headline') PHOTOSHOP_SINGLES.each do |tag, field|
options[:headline] = node.text if node = refs[:photoshop].at_xpath("./photoshop:#{tag}")
options[field] = node.text
end
end end
if node = refs[:avm].at_xpath('./avm:Distance') if node = refs[:avm].at_xpath('./avm:Distance')
list_values = node.search('.//rdf:li').collect { |li| li.text } list_values = node.search('.//rdf:li').collect { |li| li.text }
case list_values.length case list_values.length
when 0
options[:light_years] = node.text
when 1 when 1
options[:light_years] = list_values.first options[:light_years] = list_values.first
when 2 when 2

27
lib/avm/node.rb Normal file
View File

@ -0,0 +1,27 @@
require 'delegate'
module AVM
class Node < DelegateClass(Nokogiri::XML::Node)
def initialize(xmp, node)
@xmp, @node = xmp, node
super(@node)
end
def at_xpath(path)
if node = @node.at_xpath(path, @xmp.namespaces)
self.class.new(@xmp, node)
else
nil
end
end
def search(path)
self.class.from_nodeset(@xmp, @node.search(path, @xmp.namespaces))
end
def self.from_nodeset(xmp, nodeset)
nodeset.collect { |node| new(xmp, node) }
end
end
end

View File

@ -1,4 +1,5 @@
require 'nokogiri' require 'nokogiri'
require 'avm/node'
module AVM module AVM
class XMP class XMP
@ -9,6 +10,15 @@ module AVM
'avm' => 'AVM' 'avm' => 'AVM'
} }
REQUIRED_NAMESPACES = {
:x => "adobe:ns:meta/",
:rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
:dc => "http://purl.org/dc/elements/1.1/",
:photoshop => "http://ns.adobe.com/photoshop/1.0/",
:avm => "http://www.communicatingastronomy.org/avm/1.0/",
:Iptc4xmpCore => "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/"
}
attr_reader :doc attr_reader :doc
def initialize(doc = nil) def initialize(doc = nil)
@ -25,37 +35,73 @@ module AVM
new(Nokogiri::XML(string)) new(Nokogiri::XML(string))
end end
def ensure_xmlns(string)
string.gsub(%r{([</@])(\w+):}) { |all, matches| $1 + (prefix_map[$2] || $2) + ':' }
end
alias :% :ensure_xmlns
def ensure_xpath(path)
[ ensure_xmlns(path), namespaces ]
end
def search(path, node = doc)
node.search(*ensure_xpath(path))
end
def at_xpath(path, node = doc)
node.at_xpath(*ensure_xpath(path))
end
def namespaces
@namespaces ||= doc.document.collect_namespaces
end
private private
def prefix_map
@prefix_map ||= Hash[doc.document.collect_namespaces.collect { |prefix, namespace|
prefix = prefix.gsub('xmlns:', '')
result = nil
REQUIRED_NAMESPACES.each do |original_prefix, target_namespace|
result = [ original_prefix.to_s, prefix ] if namespace == target_namespace
end
result
}.compact]
end
def ensure_namespaces! def ensure_namespaces!
{ existing = doc.document.collect_namespaces
:x => "adobe:ns:meta/",
:rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", REQUIRED_NAMESPACES.each do |namespace, url|
:dc => "http://purl.org/dc/elements/1.1/", doc.root.add_namespace_definition(namespace.to_s, url) if !existing.values.include?(url)
:photoshop => "http://ns.adobe.com/photoshop/1.0/",
:avm => "http://www.communicatingastronomy.org/avm/1.0/",
:Iptc4xmpCore => "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/"
}.each do |namespace, url|
doc.root.add_namespace_definition(namespace.to_s, url)
end end
end end
def ensure_descriptions_findable! def ensure_descriptions_findable!
added = [] added = []
doc.search('//rdf:Description').each do |description| search('//rdf:Description').each do |description|
if first_child = description.first_element_child if first_child = description.first_element_child
if first_child.namespace if first_child.namespace
prefix = first_child.namespace.prefix prefix = first_child.namespace.prefix
description['rdf:about'] = PREFIXES[prefix] if prefix_description = PREFIXES[prefix_map.index(prefix)]
added << prefix description[self % 'rdf:about'] = prefix_description
added << prefix
end
end end
end end
end end
if !at_xpath('//rdf:RDF')
doc.first_element_child.add_child(self % '<rdf:RDF />')
end
PREFIXES.each do |prefix, about| PREFIXES.each do |prefix, about|
if !added.include?(prefix) if !added.include?(prefix)
doc.at_xpath('//rdf:RDF').add_child(%{<rdf:Description rdf:about="#{about}" />}) at_xpath('//rdf:RDF').add_child(self % %{<rdf:Description rdf:about="#{about}" />})
end end
end end
end end
@ -77,7 +123,7 @@ module AVM
end end
def at_rdf_description(about) def at_rdf_description(about)
@doc.at_xpath(%{//rdf:Description[@rdf:about="#{about}"]}) AVM::Node.new(self, at_xpath(%{//rdf:Description[@rdf:about="#{about}"]}))
end end
def empty_xml_doc def empty_xml_doc

View File

@ -23,4 +23,5 @@ Gem::Specification.new do |s|
s.add_development_dependency 'mocha' s.add_development_dependency 'mocha'
s.add_dependency 'nokogiri' s.add_dependency 'nokogiri'
s.add_dependency 'thor'
end end

0
spec/avm/cli_spec.rb Normal file
View File

View File

@ -157,6 +157,15 @@ describe AVM::Image do
end end
context "image in it" do context "image in it" do
context 'distance as a single value, assume light years' do
let(:file_path) { 'spec/sample_files/image/single_value_light_years.xmp' }
has_most_options
its(:redshift) { should be_nil }
its(:light_years) { should == light_years }
end
context "distance in light years" do context "distance in light years" do
let(:file_path) { 'spec/sample_files/image/light_years.xmp' } let(:file_path) { 'spec/sample_files/image/light_years.xmp' }
@ -197,7 +206,7 @@ describe AVM::Image do
it "should have basic tags" do it "should have basic tags" do
xml.at_xpath('//rdf:RDF').should_not be_nil xml.at_xpath('//rdf:RDF').should_not be_nil
xml.search('//rdf:RDF/rdf:Description').should_not be_empty xml.search('//rdf:RDF/rdf:Description').should_not be_empty
avm.at_xpath('./avm:Date').should be_nil photoshop.at_xpath('./photoshop:DateCreated').should_not be_nil
end end
end end
@ -221,7 +230,7 @@ describe AVM::Image do
xpath_text(avm, './avm:Spectral.Notes').should == spectral_notes xpath_text(avm, './avm:Spectral.Notes').should == spectral_notes
xpath_text(avm, './avm:ReferenceURL').should == reference_url xpath_text(avm, './avm:ReferenceURL').should == reference_url
xpath_text(avm, './avm:Credit').should == credit xpath_text(avm, './avm:Credit').should == credit
xpath_text(avm, './avm:Date').should == date xpath_text(photoshop, './photoshop:DateCreated').should == date
xpath_text(avm, './avm:ID').should == id xpath_text(avm, './avm:ID').should == id
xpath_text(avm, './avm:Type').should == type xpath_text(avm, './avm:Type').should == type
xpath_text(avm, './avm:Image.ProductQuality').should == image_quality xpath_text(avm, './avm:Image.ProductQuality').should == image_quality

View File

@ -24,11 +24,63 @@ describe AVM::XMP do
end end
end end
describe '.from_string' do describe 'xml from string' do
let(:xmp) { self.class.describes.from_string(string) } let(:xmp) { self.class.describes.from_string(string) }
let(:string) { '<xml xmlns:rdf="cats"><rdf:RDF><node /></rdf:RDF></xml>' } let(:doc) { xmp.doc }
specify { xmp.doc.at_xpath('//node').should_not be_nil } describe '.from_string' do
let(:string) { '<xml xmlns:rdf="cats"><rdf:RDF><node /></rdf:RDF></xml>' }
specify { xmp.doc.at_xpath('//node').should_not be_nil }
end
describe '#ensure_namespaces! and #ensure_xmlns' do
let(:rdf_namespace) { AVM::XMP::REQUIRED_NAMESPACES[:rdf] }
def self.all_default_namespaces
it "should have all the namespaces with the default prefixes" do
namespaces = doc.document.collect_namespaces
namespaces_to_test = AVM::XMP::REQUIRED_NAMESPACES.dup
yield namespaces_to_test if block_given?
namespaces_to_test.each do |prefix, namespace|
if namespace
namespaces["xmlns:#{prefix}"].should == namespace
end
end
end
end
before { doc }
context 'none of the namespaces exist' do
let(:string) { '<xml><node /></xml>' }
all_default_namespaces
specify { xmp.ensure_xmlns('.//rdf:what').should == './/rdf:what' }
end
context 'one namespace exists with the same prefix' do
let(:string) { %{<xml xmlns:rdf="#{rdf_namespace}"><node /></xml>} }
all_default_namespaces
specify { xmp.ensure_xmlns('.//rdf:what').should == './/rdf:what' }
end
context 'one namespace exists with a different prefix' do
let(:string) { %{<xml xmlns:r="#{rdf_namespace}"><node /></xml>} }
all_default_namespaces { |namespaces|
namespaces.delete(:rdf)
namespaces[:r] = AVM::XMP::REQUIRED_NAMESPACES[:rdf]
}
specify { xmp.ensure_xmlns('.//rdf:what').should == './/r:what' }
end
end
end end
describe '#ensure_descriptions_findable!' do describe '#ensure_descriptions_findable!' do
@ -46,7 +98,7 @@ describe AVM::XMP do
let(:content) { '' } let(:content) { '' }
[ 'Dublin Core', 'IPTC', 'Photoshop', 'AVM' ].each do |which| [ 'Dublin Core', 'IPTC', 'Photoshop', 'AVM' ].each do |which|
specify { xmp.doc.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).children.should be_empty } specify { xmp.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).children.should be_empty }
end end
end end
@ -67,8 +119,28 @@ describe AVM::XMP do
XML XML
[ 'Dublin Core', 'IPTC', 'Photoshop', 'AVM' ].each do |which| [ 'Dublin Core', 'IPTC', 'Photoshop', 'AVM' ].each do |which|
specify { xmp.doc.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).should_not be_nil } specify { xmp.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).should_not be_nil }
end end
end end
context 'has a namespace it should know about with a different prefix' do
let(:content) { <<-XML }
<rdf:Description rdf:about="" xmlns:whatever="http://purl.org/dc/elements/1.1/">
<whatever:creator />
</rdf:Description>
XML
specify { xmp.at_xpath(%{//rdf:Description[@rdf:about="Dublin Core"]}).should_not be_nil }
end
context 'has a namespace it knows nothing about' do
let(:content) { <<-XML }
<rdf:Description rdf:about="" xmlns:whatever="http://example.com">
<whatever:creator />
</rdf:Description>
XML
it { expect { xmp }.to_not raise_error }
end
end end
end end

View File

@ -5,6 +5,11 @@
<dc:creator> <dc:creator>
<rdf:Seq><rdf:li>John Bintz</rdf:li></rdf:Seq> <rdf:Seq><rdf:li>John Bintz</rdf:li></rdf:Seq>
</dc:creator> </dc:creator>
<dc:title>
<rdf:Alt>
<rdf:li xml:lang="x-default">This is an image</rdf:li>
</rdf:Alt>
</dc:title>
</rdf:Description> </rdf:Description>
<rdf:Description rdf:about="" <rdf:Description rdf:about=""
xmlns:Iptc4xmpCore="http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/"> xmlns:Iptc4xmpCore="http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/">

View File

@ -22,6 +22,7 @@
<rdf:Description rdf:about="" <rdf:Description rdf:about=""
xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/"> xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/">
<photoshop:Headline>Headline</photoshop:Headline> <photoshop:Headline>Headline</photoshop:Headline>
<photoshop:DateCreated>2010-01-01</photoshop:DateCreated>
</rdf:Description> </rdf:Description>
<rdf:Description rdf:about="" <rdf:Description rdf:about=""
xmlns:avm="http://www.communicatingastronomy.org/avm/1.0/"> xmlns:avm="http://www.communicatingastronomy.org/avm/1.0/">
@ -29,7 +30,6 @@
<avm:Distance.Notes>Distance Notes</avm:Distance.Notes> <avm:Distance.Notes>Distance Notes</avm:Distance.Notes>
<avm:ReferenceURL>Reference URL</avm:ReferenceURL> <avm:ReferenceURL>Reference URL</avm:ReferenceURL>
<avm:Credit>Credit</avm:Credit> <avm:Credit>Credit</avm:Credit>
<avm:Date>2010-01-01</avm:Date>
<avm:ID>ID</avm:ID> <avm:ID>ID</avm:ID>
<avm:Type>Observation</avm:Type> <avm:Type>Observation</avm:Type>
<avm:Image.ProductQuality>Good</avm:Image.ProductQuality> <avm:Image.ProductQuality>Good</avm:Image.ProductQuality>

View File

@ -21,6 +21,7 @@
<rdf:Description rdf:about="" <rdf:Description rdf:about=""
xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/"> xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/">
<photoshop:Headline>Headline</photoshop:Headline> <photoshop:Headline>Headline</photoshop:Headline>
<photoshop:DateCreated>2010-01-01</photoshop:DateCreated>
</rdf:Description> </rdf:Description>
<rdf:Description rdf:about="" <rdf:Description rdf:about=""
xmlns:avm="http://www.communicatingastronomy.org/avm/1.0/"> xmlns:avm="http://www.communicatingastronomy.org/avm/1.0/">
@ -28,7 +29,6 @@
<avm:Distance.Notes>Distance Notes</avm:Distance.Notes> <avm:Distance.Notes>Distance Notes</avm:Distance.Notes>
<avm:ReferenceURL>Reference URL</avm:ReferenceURL> <avm:ReferenceURL>Reference URL</avm:ReferenceURL>
<avm:Credit>Credit</avm:Credit> <avm:Credit>Credit</avm:Credit>
<avm:Date>2010-01-01</avm:Date>
<avm:ID>ID</avm:ID> <avm:ID>ID</avm:ID>
<avm:Type>Observation</avm:Type> <avm:Type>Observation</avm:Type>
<avm:Image.ProductQuality>Good</avm:Image.ProductQuality> <avm:Image.ProductQuality>Good</avm:Image.ProductQuality>

View File

@ -22,6 +22,7 @@
<rdf:Description rdf:about="" <rdf:Description rdf:about=""
xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/"> xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/">
<photoshop:Headline>Headline</photoshop:Headline> <photoshop:Headline>Headline</photoshop:Headline>
<photoshop:DateCreated>2010-01-01</photoshop:DateCreated>
</rdf:Description> </rdf:Description>
<rdf:Description rdf:about="" <rdf:Description rdf:about=""
xmlns:avm="http://www.communicatingastronomy.org/avm/1.0/"> xmlns:avm="http://www.communicatingastronomy.org/avm/1.0/">
@ -29,7 +30,6 @@
<avm:Distance.Notes>Distance Notes</avm:Distance.Notes> <avm:Distance.Notes>Distance Notes</avm:Distance.Notes>
<avm:ReferenceURL>Reference URL</avm:ReferenceURL> <avm:ReferenceURL>Reference URL</avm:ReferenceURL>
<avm:Credit>Credit</avm:Credit> <avm:Credit>Credit</avm:Credit>
<avm:Date>2010-01-01</avm:Date>
<avm:ID>ID</avm:ID> <avm:ID>ID</avm:ID>
<avm:Type>Observation</avm:Type> <avm:Type>Observation</avm:Type>
<avm:Image.ProductQuality>Good</avm:Image.ProductQuality> <avm:Image.ProductQuality>Good</avm:Image.ProductQuality>

View File

@ -0,0 +1,77 @@
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 4.2.2-c063 53.352624, 2008/07/30-18:05:41 ">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about=""
xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title>:
<rdf:Alt>
<rdf:li xml:lang="x-default">My title</rdf:li>
</rdf:Alt>
</dc:title>
<dc:description>
<rdf:Alt>
<rdf:li xml:lang="x-default">Description</rdf:li>
</rdf:Alt>
</dc:description>
</rdf:Description>
<rdf:Description rdf:about=""
xmlns:Iptc4xmpCore="http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/">
<Iptc4xmpCore:CreatorContactInfo rdf:parseType="Resource">
</Iptc4xmpCore:CreatorContactInfo>
</rdf:Description>
<rdf:Description rdf:about=""
xmlns:photoshop="http://ns.adobe.com/photoshop/1.0/">
<photoshop:Headline>Headline</photoshop:Headline>
<photoshop:DateCreated>2010-01-01</photoshop:DateCreated>
</rdf:Description>
<rdf:Description rdf:about=""
xmlns:avm="http://www.communicatingastronomy.org/avm/1.0/">
<avm:Spectral.Notes>Spectral Notes</avm:Spectral.Notes>
<avm:Distance.Notes>Distance Notes</avm:Distance.Notes>
<avm:ReferenceURL>Reference URL</avm:ReferenceURL>
<avm:Credit>Credit</avm:Credit>
<avm:ID>ID</avm:ID>
<avm:Type>Observation</avm:Type>
<avm:Image.ProductQuality>Good</avm:Image.ProductQuality>
<avm:Distance>Light years</avm:Distance>
<avm:Spatial.CoordinateFrame>ICRS</avm:Spatial.CoordinateFrame>
<avm:Spatial.Equinox>100</avm:Spatial.Equinox>
<avm:Spatial.ReferenceValue>
<rdf:Seq>
<rdf:li>100</rdf:li>
<rdf:li>50</rdf:li>
</rdf:Seq>
</avm:Spatial.ReferenceValue>
<avm:Spatial.ReferenceDimension>
<rdf:Seq>
<rdf:li>200</rdf:li>
<rdf:li>150</rdf:li>
</rdf:Seq>
</avm:Spatial.ReferenceDimension>
<avm:Spatial.ReferencePixel>
<rdf:Seq>
<rdf:li>25</rdf:li>
<rdf:li>15</rdf:li>
</rdf:Seq>
</avm:Spatial.ReferencePixel>
<avm:Spatial.Scale>
<rdf:Seq>
<rdf:li>40</rdf:li>
<rdf:li>35</rdf:li>
</rdf:Seq>
</avm:Spatial.Scale>
<avm:Spatial.Rotation>10</avm:Spatial.Rotation>
<avm:Spatial.CoordsystemProjection>TAN</avm:Spatial.CoordsystemProjection>
<avm:Spatial.Quality>Full</avm:Spatial.Quality>
<avm:Spatial.Notes>Spatial Notes</avm:Spatial.Notes>
<avm:Spatial.FITSheader>FITS header</avm:Spatial.FITSheader>
<avm:Spatial.CDMatrix>
<rdf:Seq>
<rdf:li>1</rdf:li>
<rdf:li>2</rdf:li>
<rdf:li>3</rdf:li>
<rdf:li>4</rdf:li>
</rdf:Seq>
</avm:Spatial.CDMatrix>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>