Commits

Anonymous committed 32ef937

add md5 sums automatically to ingested objects

Comments (0)

Files changed (4)

lib/mrt/ingest/iobject.rb

 
 module Mrt
   module Ingest
+    class Component
+      def initialize(server, where, options)
+        @name = options[:name]
+        @digest = options[:digest]
+        @mime_type = options[:mime_type]
+        @size = options[:size]
+        
+        case where
+        when File, Tempfile
+          @name = File.basename(where.path) if @name.nil?
+          @uri = server.add_file(where)
+          if @digest.nil? then
+            @digest = Mrt::Ingest::MessageDigest::MD5.from_file(where)
+          end
+          @size = File.size(where.path) if @size.nil?
+        when URI
+          @name = File.basename(where.to_s) if @name.nil?
+          @uri = where
+        else
+          raise IngestException.new("Trying to add a component that is not a File or URI")
+        end
+        
+      end
+      
+      def to_manifest_entry
+        (digest_alg, digest_value) = if @digest.nil? then
+                                       ['', '']
+                                     else
+                                       [@digest.type, @digest.value]
+                                     end
+        return "#{@uri} | #{digest_alg} | #{digest_value} | #{@size || ''} | | #{@name} | #{@mime_type || '' }\n"
+      end
+    end
+    
     # An object ready for ingest into Merritt.
     class IObject
       
         @server = options[:server] || Mrt::Ingest::OneTimeServer.new
       end
       
-      def add_component(component, name=nil)
-        case component
-        when File, Tempfile
-          name = File.basename(component.path) if name.nil?
-          @components.push([@server.add_file(component), name])
-        when URI
-          name = File.basename(component.to_s) if name.nil?
-          @components.push([component, name])
-        else
-          raise IngestException.new("Trying to add a component that is not a File or URI")
-        end
+      def add_component(where, options={})
+        @components.push(Component.new(@server, where, options))
       end
       
       # Make a Mrt::Ingest::Request object for this mrt-object
       def mk_request(profile, submitter)
-        erc_url = case @erc
-                  when URI
-                    @erc
-                  when File, Tempfile
-                    @server.add_file(@erc)
-                  when Hash
-                    @server.add_file do |f|
-                      @erc.each do |k, v|
-                        f.write("#{k}: #{v}\n")
-                      end
-                    end
-                  end
+        erc_component = case @erc
+                        when URI, File, Tempfile
+                          Component.new(@server, @erc, :name => 'mrt-erc.txt')
+                        when Hash
+                          uri_str, path = @server.add_file do |f|
+                            @erc.each do |k, v|
+                              f.write("#{k}: #{v}\n")
+                            end
+                          end
+                          Component.new(@server, 
+                                        URI.parse(uri_str), 
+                                        :name => 'mrt-erc.txt',
+                                        :digest => Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path)))
+                        end
         manifest_file = Tempfile.new("mrt-ingest")
-        mk_manifest(manifest_file, erc_url)
+        mk_manifest(manifest_file, erc_component)
         # reset to beginning
         manifest_file.open
         return Mrt::Ingest::Request.
         return @server.stop_server()
       end
         
-      def mk_manifest(manifest, erc_url)
+      def mk_manifest(manifest, erc_component)
         manifest.write("#%checkm_0.7\n")
         manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
         manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
         manifest.write("#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#\n")
         manifest.write("#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType\n")
         @components.each { |c|
-          manifest.write("#{c[0]} | | | | | #{c[1]} | \n")
+          manifest.write(c.to_manifest_entry)
         }
-        manifest.write("#{erc_url} | | | | | mrt-erc.txt | \n")
+        manifest.write(erc_component.to_manifest_entry)
         manifest.write("#%EOF\n")
       end
       

lib/mrt/ingest/message_digest.rb

 # Author::    Erik Hetzner  (mailto:erik.hetzner@ucop.edu)
 # Copyright:: Copyright (c) 2011, Regents of the University of California
 
+require 'digest/md5'
+
 module Mrt
   module Ingest
     module MessageDigest
       class Base
         attr_reader :value
-        def init(value)
+        def initialize(value)
           @value = value
         end
       end
       
-      class SHA256
+      class SHA256 < Base
         def type
           return "sha-256"
         end
       end
 
-      class MD5
+      class MD5 < Base
         def type
           return "md5"
         end
+        
+        def self.from_file(file)
+          digest = Digest::MD5.new
+          File.open(file.path, 'r') do |f|
+            buff = ""
+            while (f.read(1024, buff) != nil)
+              digest << buff
+            end
+          end
+          return Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
+        end
       end
 
-      class SHA1
+      class SHA1 < Base
         def type
           return "sha-1"
         end

lib/mrt/ingest/one_time_server.rb

         return get_temppath
       end
 
-      # Add a file to this server. Returns the URL to use to fetch the
-      # file.
+      # Add a file to this server. Returns the URL to use
+      # to fetch the file & the file path
       def add_file(sourcefile=nil)
         fullpath = get_temppath()
         path = File.basename(fullpath)
             yield f
           end
         end
-        return "http://#{Socket.gethostname}:#{@port}/#{path}"
+        return "http://#{Socket.gethostname}:#{@port}/#{path}", fullpath
       end
       
       def start_server

test/test_iobject.rb

     end
     
     should "be able to add a URI component" do
-      @iobject.add_component(URI.parse("http://example.org/file"), "file")
+      @iobject.add_component(URI.parse("http://example.org/file"))
     end
 
     should "not be able to add a non-URI component" do
       assert_raise(Mrt::Ingest::IngestException) do
-        @iobject.add_component("http://example.org/file", "file")
+        @iobject.add_component("http://example.org/file")
       end
     end
     
 Hello, world!
 EOS
 
+  FILE_CONTENT_MD5 = "746308829575e17c3331bbcb00c0898b"
+
   context "serving local files" do
     should "be able to add a local file component" do
       iobject = Mrt::Ingest::IObject.new
       tempfile = write_to_tempfile(FILE_CONTENT)
-      iobject.add_component(tempfile, "helloworld")
+      iobject.add_component(tempfile, {:name => "helloworld" })
       uri_entry = get_uri_for_name(iobject, "helloworld")
       erc_entry = get_uri_for_name(iobject, "mrt-erc.txt")
+      manifest = parse_object_manifest(iobject)
       if uri_entry.nil?
         assert(false, "Could not find hosted file URI!")
       else