From 8f9141bece0de99da3aa174d83574f615066d382 Mon Sep 17 00:00:00 2001 From: John Bintz Date: Fri, 18 Mar 2011 12:28:53 -0400 Subject: [PATCH] starting to clean things up to read real xmp files --- bin/avm2avm | 7 ++ lib/avm/cli.rb | 17 ++++ lib/avm/creator.rb | 6 +- lib/avm/image.rb | 17 ++-- lib/avm/node.rb | 27 ++++++ lib/avm/xmp.rb | 74 +++++++++++++---- ruby-avm-library.gemspec | 1 + spec/avm/cli_spec.rb | 0 spec/avm/image_spec.rb | 13 ++- spec/avm/xmp_spec.rb | 82 +++++++++++++++++-- spec/sample_files/creator/one_creator.xmp | 5 ++ spec/sample_files/image/both.xmp | 2 +- spec/sample_files/image/light_years.xmp | 2 +- spec/sample_files/image/redshift.xmp | 2 +- .../image/single_value_light_years.xmp | 77 +++++++++++++++++ 15 files changed, 300 insertions(+), 32 deletions(-) create mode 100755 bin/avm2avm create mode 100644 lib/avm/cli.rb create mode 100644 lib/avm/node.rb create mode 100644 spec/avm/cli_spec.rb create mode 100644 spec/sample_files/image/single_value_light_years.xmp diff --git a/bin/avm2avm b/bin/avm2avm new file mode 100755 index 0000000..70ca7f6 --- /dev/null +++ b/bin/avm2avm @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby + +require 'thor' +require 'avm/cli' + +AVM::CLI.start + diff --git a/lib/avm/cli.rb b/lib/avm/cli.rb new file mode 100644 index 0000000..bf18cee --- /dev/null +++ b/lib/avm/cli.rb @@ -0,0 +1,17 @@ +require 'thor' +require 'avm/image' +require 'pp' + +module AVM + class CLI < ::Thor + default_task :convert + + desc 'convert', "Convert a file from one format to another" + def convert + data = $stdin.read + + pp AVM::Image.from_xml(data).to_h + end + end +end + diff --git a/lib/avm/creator.rb b/lib/avm/creator.rb index 52b64a8..cadea35 100644 --- a/lib/avm/creator.rb +++ b/lib/avm/creator.rb @@ -75,12 +75,12 @@ module AVM def from_xml(image, document) contacts = [] document.get_refs do |refs| - refs[:dublin_core].search('.//rdf:li').each do |name| - contacts << { :name => name.text } + refs[:dublin_core].search('.//dc:creator//rdf:li').each do |name| + contacts << { :name => name.text.strip } end IPTC_MULTI_FIELD_MAP.each do |key, element_name| - if node = refs[:iptc].at_xpath("//Iptc4xmpCore:#{element_name}") + if node = refs[:iptc].at_xpath(".//Iptc4xmpCore:#{element_name}") node.text.split(',').collect(&:strip).each_with_index do |value, index| contacts[index][key] = value end diff --git a/lib/avm/image.rb b/lib/avm/image.rb index d4ebb52..281ee8d 100644 --- a/lib/avm/image.rb +++ b/lib/avm/image.rb @@ -11,12 +11,16 @@ module AVM class Image DUBLIN_CORE_FIELDS = [ :title, :description ] + PHOTOSHOP_SINGLES = { + 'Headline' => :headline, + 'DateCreated' => :date + } + AVM_SINGLE_FIELDS = [ 'Distance.Notes', 'Spectral.Notes', 'ReferenceURL', 'Credit', - 'Date', 'ID', 'Type', 'Image.ProductQuality', @@ -40,7 +44,6 @@ module AVM :spectral_notes, :reference_url, :credit, - :date, :id, :type, :quality, @@ -64,7 +67,6 @@ module AVM :spectral_notes, :reference_url, :credit, - :string_date, :id, :image_type, :image_quality, @@ -141,6 +143,7 @@ module AVM end refs[:photoshop].add_child(%{#{headline}}) + refs[:photoshop].add_child(%{#{string_date}}) AVM_SINGLES_FOR_MESSAGES.each do |tag, message| if value = send(message) @@ -229,14 +232,18 @@ module AVM end end - if node = refs[:photoshop].at_xpath('./photoshop:Headline') - options[:headline] = node.text + PHOTOSHOP_SINGLES.each do |tag, field| + if node = refs[:photoshop].at_xpath("./photoshop:#{tag}") + options[field] = node.text + end end if node = refs[:avm].at_xpath('./avm:Distance') list_values = node.search('.//rdf:li').collect { |li| li.text } case list_values.length + when 0 + options[:light_years] = node.text when 1 options[:light_years] = list_values.first when 2 diff --git a/lib/avm/node.rb b/lib/avm/node.rb new file mode 100644 index 0000000..465a265 --- /dev/null +++ b/lib/avm/node.rb @@ -0,0 +1,27 @@ +require 'delegate' + +module AVM + class Node < DelegateClass(Nokogiri::XML::Node) + def initialize(xmp, node) + @xmp, @node = xmp, node + super(@node) + end + + def at_xpath(path) + if node = @node.at_xpath(path, @xmp.namespaces) + self.class.new(@xmp, node) + else + nil + end + end + + def search(path) + self.class.from_nodeset(@xmp, @node.search(path, @xmp.namespaces)) + end + + def self.from_nodeset(xmp, nodeset) + nodeset.collect { |node| new(xmp, node) } + end + end +end + diff --git a/lib/avm/xmp.rb b/lib/avm/xmp.rb index e9bc4c4..50d763f 100644 --- a/lib/avm/xmp.rb +++ b/lib/avm/xmp.rb @@ -1,4 +1,5 @@ require 'nokogiri' +require 'avm/node' module AVM class XMP @@ -9,6 +10,15 @@ module AVM 'avm' => 'AVM' } + REQUIRED_NAMESPACES = { + :x => "adobe:ns:meta/", + :rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + :dc => "http://purl.org/dc/elements/1.1/", + :photoshop => "http://ns.adobe.com/photoshop/1.0/", + :avm => "http://www.communicatingastronomy.org/avm/1.0/", + :Iptc4xmpCore => "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/" + } + attr_reader :doc def initialize(doc = nil) @@ -25,37 +35,73 @@ module AVM new(Nokogiri::XML(string)) end + def ensure_xmlns(string) + string.gsub(%r{([ "adobe:ns:meta/", - :rdf => "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - :dc => "http://purl.org/dc/elements/1.1/", - :photoshop => "http://ns.adobe.com/photoshop/1.0/", - :avm => "http://www.communicatingastronomy.org/avm/1.0/", - :Iptc4xmpCore => "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/" - }.each do |namespace, url| - doc.root.add_namespace_definition(namespace.to_s, url) + existing = doc.document.collect_namespaces + + REQUIRED_NAMESPACES.each do |namespace, url| + doc.root.add_namespace_definition(namespace.to_s, url) if !existing.values.include?(url) end end def ensure_descriptions_findable! added = [] - doc.search('//rdf:Description').each do |description| + search('//rdf:Description').each do |description| if first_child = description.first_element_child if first_child.namespace prefix = first_child.namespace.prefix - description['rdf:about'] = PREFIXES[prefix] - added << prefix + if prefix_description = PREFIXES[prefix_map.index(prefix)] + description[self % 'rdf:about'] = prefix_description + added << prefix + end end end end + if !at_xpath('//rdf:RDF') + doc.first_element_child.add_child(self % '') + end + + PREFIXES.each do |prefix, about| if !added.include?(prefix) - doc.at_xpath('//rdf:RDF').add_child(%{}) + at_xpath('//rdf:RDF').add_child(self % %{}) end end end @@ -77,7 +123,7 @@ module AVM end def at_rdf_description(about) - @doc.at_xpath(%{//rdf:Description[@rdf:about="#{about}"]}) + AVM::Node.new(self, at_xpath(%{//rdf:Description[@rdf:about="#{about}"]})) end def empty_xml_doc diff --git a/ruby-avm-library.gemspec b/ruby-avm-library.gemspec index ef7efee..9d985b9 100644 --- a/ruby-avm-library.gemspec +++ b/ruby-avm-library.gemspec @@ -23,4 +23,5 @@ Gem::Specification.new do |s| s.add_development_dependency 'mocha' s.add_dependency 'nokogiri' + s.add_dependency 'thor' end diff --git a/spec/avm/cli_spec.rb b/spec/avm/cli_spec.rb new file mode 100644 index 0000000..e69de29 diff --git a/spec/avm/image_spec.rb b/spec/avm/image_spec.rb index 12bdc5d..d1a66d9 100644 --- a/spec/avm/image_spec.rb +++ b/spec/avm/image_spec.rb @@ -157,6 +157,15 @@ describe AVM::Image do end context "image in it" do + context 'distance as a single value, assume light years' do + let(:file_path) { 'spec/sample_files/image/single_value_light_years.xmp' } + + has_most_options + + its(:redshift) { should be_nil } + its(:light_years) { should == light_years } + end + context "distance in light years" do let(:file_path) { 'spec/sample_files/image/light_years.xmp' } @@ -197,7 +206,7 @@ describe AVM::Image do it "should have basic tags" do xml.at_xpath('//rdf:RDF').should_not be_nil xml.search('//rdf:RDF/rdf:Description').should_not be_empty - avm.at_xpath('./avm:Date').should be_nil + photoshop.at_xpath('./photoshop:DateCreated').should_not be_nil end end @@ -221,7 +230,7 @@ describe AVM::Image do xpath_text(avm, './avm:Spectral.Notes').should == spectral_notes xpath_text(avm, './avm:ReferenceURL').should == reference_url xpath_text(avm, './avm:Credit').should == credit - xpath_text(avm, './avm:Date').should == date + xpath_text(photoshop, './photoshop:DateCreated').should == date xpath_text(avm, './avm:ID').should == id xpath_text(avm, './avm:Type').should == type xpath_text(avm, './avm:Image.ProductQuality').should == image_quality diff --git a/spec/avm/xmp_spec.rb b/spec/avm/xmp_spec.rb index d2c30f3..b062122 100644 --- a/spec/avm/xmp_spec.rb +++ b/spec/avm/xmp_spec.rb @@ -24,11 +24,63 @@ describe AVM::XMP do end end - describe '.from_string' do + describe 'xml from string' do let(:xmp) { self.class.describes.from_string(string) } - let(:string) { '' } + let(:doc) { xmp.doc } - specify { xmp.doc.at_xpath('//node').should_not be_nil } + describe '.from_string' do + let(:string) { '' } + + specify { xmp.doc.at_xpath('//node').should_not be_nil } + end + + describe '#ensure_namespaces! and #ensure_xmlns' do + let(:rdf_namespace) { AVM::XMP::REQUIRED_NAMESPACES[:rdf] } + + def self.all_default_namespaces + it "should have all the namespaces with the default prefixes" do + namespaces = doc.document.collect_namespaces + + namespaces_to_test = AVM::XMP::REQUIRED_NAMESPACES.dup + yield namespaces_to_test if block_given? + + namespaces_to_test.each do |prefix, namespace| + if namespace + namespaces["xmlns:#{prefix}"].should == namespace + end + end + end + end + + before { doc } + + context 'none of the namespaces exist' do + let(:string) { '' } + + all_default_namespaces + + specify { xmp.ensure_xmlns('.//rdf:what').should == './/rdf:what' } + end + + context 'one namespace exists with the same prefix' do + let(:string) { %{} } + + all_default_namespaces + + specify { xmp.ensure_xmlns('.//rdf:what').should == './/rdf:what' } + end + + context 'one namespace exists with a different prefix' do + let(:string) { %{} } + + all_default_namespaces { |namespaces| + namespaces.delete(:rdf) + namespaces[:r] = AVM::XMP::REQUIRED_NAMESPACES[:rdf] + } + + specify { xmp.ensure_xmlns('.//rdf:what').should == './/r:what' } + end + end end describe '#ensure_descriptions_findable!' do @@ -46,7 +98,7 @@ describe AVM::XMP do let(:content) { '' } [ 'Dublin Core', 'IPTC', 'Photoshop', 'AVM' ].each do |which| - specify { xmp.doc.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).children.should be_empty } + specify { xmp.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).children.should be_empty } end end @@ -67,8 +119,28 @@ describe AVM::XMP do XML [ 'Dublin Core', 'IPTC', 'Photoshop', 'AVM' ].each do |which| - specify { xmp.doc.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).should_not be_nil } + specify { xmp.at_xpath(%{//rdf:Description[@rdf:about="#{which}"]}).should_not be_nil } end end + + context 'has a namespace it should know about with a different prefix' do + let(:content) { <<-XML } + + + + XML + + specify { xmp.at_xpath(%{//rdf:Description[@rdf:about="Dublin Core"]}).should_not be_nil } + end + + context 'has a namespace it knows nothing about' do + let(:content) { <<-XML } + + + + XML + + it { expect { xmp }.to_not raise_error } + end end end diff --git a/spec/sample_files/creator/one_creator.xmp b/spec/sample_files/creator/one_creator.xmp index 8518979..0e923eb 100644 --- a/spec/sample_files/creator/one_creator.xmp +++ b/spec/sample_files/creator/one_creator.xmp @@ -5,6 +5,11 @@ John Bintz + + + This is an image + + diff --git a/spec/sample_files/image/both.xmp b/spec/sample_files/image/both.xmp index 79f615c..228fce5 100644 --- a/spec/sample_files/image/both.xmp +++ b/spec/sample_files/image/both.xmp @@ -22,6 +22,7 @@ Headline + 2010-01-01 @@ -29,7 +30,6 @@ Distance Notes Reference URL Credit - 2010-01-01 ID Observation Good diff --git a/spec/sample_files/image/light_years.xmp b/spec/sample_files/image/light_years.xmp index 01c25e0..a2c0e51 100644 --- a/spec/sample_files/image/light_years.xmp +++ b/spec/sample_files/image/light_years.xmp @@ -21,6 +21,7 @@ Headline + 2010-01-01 @@ -28,7 +29,6 @@ Distance Notes Reference URL Credit - 2010-01-01 ID Observation Good diff --git a/spec/sample_files/image/redshift.xmp b/spec/sample_files/image/redshift.xmp index 0023b24..5106eea 100644 --- a/spec/sample_files/image/redshift.xmp +++ b/spec/sample_files/image/redshift.xmp @@ -22,6 +22,7 @@ Headline + 2010-01-01 @@ -29,7 +30,6 @@ Distance Notes Reference URL Credit - 2010-01-01 ID Observation Good diff --git a/spec/sample_files/image/single_value_light_years.xmp b/spec/sample_files/image/single_value_light_years.xmp new file mode 100644 index 0000000..ec32efe --- /dev/null +++ b/spec/sample_files/image/single_value_light_years.xmp @@ -0,0 +1,77 @@ + + + + : + + My title + + + + + Description + + + + + + + + + Headline + 2010-01-01 + + + Spectral Notes + Distance Notes + Reference URL + Credit + ID + Observation + Good + Light years + ICRS + 100 + + + 100 + 50 + + + + + 200 + 150 + + + + + 25 + 15 + + + + + 40 + 35 + + + 10 + TAN + Full + Spatial Notes + FITS header + + + 1 + 2 + 3 + 4 + + + + +