diff --git a/README.md b/README.md index 6cf32dd5..ca2854e8 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,8 @@ We use AWS SQS queues to publish theses to DSpace and read data about published `DSPACE_DOCTORAL_HANDLE` - The handle for the collection to use for depositing Doctoral theses. `DSPACE_GRADUATE_HANDLE` - The handle for the collection to use for depositing Graduate theses. `DSPACE_UNDERGRADUATE_HANDLE` - The handle for the collection to use for depositing Undergraduate theses. - +`DSPACE_V8_METADATA` - Toggle metadata format for publication payloads. Set to `true` for DSpace 8 format. +set to `false` for DSpace 6 `metadata` array format. Default is `false`. `SQS_INPUT_QUEUE_URL` - The URL of the SQS input queue used for publication to DSpace. `SQS_OUTPUT_QUEUE_NAME` - The name of the SQS output queue. This is used to build the SQS message attributes. `SQS_OUTPUT_QUEUE_URL` - The URL of the SQS output queue used to read the results from a publication run. diff --git a/app.json b/app.json index 2ad22ae7..48ff6c5e 100644 --- a/app.json +++ b/app.json @@ -17,6 +17,7 @@ "AWS_SECRET_ACCESS_KEY": { "required": true }, + "DSPACE_V8_METADATA": "false", "DISABLE_ALL_EMAIL": "true", "FAKE_AUTH_ENABLED": "true", "HEROKU_APP_NAME": { diff --git a/app/models/dspace_metadata.rb b/app/models/dspace_metadata.rb index 90bc53e1..624c8a1f 100644 --- a/app/models/dspace_metadata.rb +++ b/app/models/dspace_metadata.rb @@ -4,9 +4,9 @@ class DspaceMetadata def initialize(thesis) - @dc = {}.compare_by_identity - @dc['dc.publisher'] = 'Massachusetts Institute of Technology' - @dc['dc.type'] = 'Thesis' + @metadata_entries = [] + add_metadata('dc.publisher', 'Massachusetts Institute of Technology') + add_metadata('dc.type', 'Thesis') title(thesis) contributors(thesis.users, thesis.advisors) departments(thesis.departments) @@ -17,22 +17,26 @@ def initialize(thesis) # Generates JSON metadata file required for submission to DSS. def serialize_dss_metadata - { 'metadata' => @dc.map { |k, v| { 'key' => k, 'value' => v } } }.to_json + if Flipflop.enabled?(:dspace_v8_metadata) + serialize_dspace8.to_json + else + { 'metadata' => serialize_dspace6 }.to_json + end end def title(thesis) - @dc['dc.title'] = thesis.title - @dc['dc.description.abstract'] = thesis.abstract if thesis.abstract - @dc['dc.date.issued'] = thesis.grad_date.strftime('%Y-%m') + add_metadata('dc.title', thesis.title) + add_metadata('dc.description.abstract', thesis.abstract) if thesis.abstract + add_metadata('dc.date.issued', thesis.grad_date.strftime('%Y-%m')) end def contributors(thesis_users, thesis_advisors) thesis_users.each do |a| - @dc['dc.contributor.author'] = a.preferred_name + add_metadata('dc.contributor.author', a.preferred_name) end parse_orcids(thesis_users) thesis_advisors.each do |adv| - @dc['dc.contributor.advisor'] = adv.name + add_metadata('dc.contributor.advisor', adv.name) end end @@ -44,49 +48,98 @@ def parse_orcids(thesis_users) return unless orcids.present? orcids.each do |orcid| - @dc['dc.identifier.orcid'] = orcid + add_metadata('dc.identifier.orcid', orcid) end end def departments(thesis_depts) thesis_depts.each do |d| - @dc['dc.contributor.department'] = d.name_dspace + add_metadata('dc.contributor.department', d.name_dspace) end end def degrees(thesis_degrees) thesis_degrees.each do |degree| - @dc['dc.description.degree'] = degree.abbreviation - @dc['thesis.degree.name'] = degree.name_dspace + add_metadata('dc.description.degree', degree.abbreviation) + add_metadata('thesis.degree.name', degree.name_dspace) end # Degree types should not be repeated if they are the same type. types = thesis_degrees.map { |degree| degree.degree_type.name }.uniq types.each do |t| - @dc['mit.thesis.degree'] = t + add_metadata('mit.thesis.degree', t) end end def copyright(thesis_copyright, thesis_license) if thesis_copyright.holder != 'Author' # copyright holder is anyone but author - @dc['dc.rights'] = thesis_copyright.statement_dspace - @dc['dc.rights'] = "Copyright #{thesis_copyright.holder}" - @dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url + add_metadata('dc.rights', thesis_copyright.statement_dspace) + add_metadata('dc.rights', "Copyright #{thesis_copyright.holder}") + add_metadata('dc.rights.uri', thesis_copyright.url) if thesis_copyright.url elsif thesis_license # author holds copyright and provides a license - @dc['dc.rights'] = thesis_license.map_license_type - @dc['dc.rights'] = 'Copyright retained by author(s)' + add_metadata('dc.rights', thesis_license.map_license_type) + add_metadata('dc.rights', 'Copyright retained by author(s)') # Theoretically both license and copyright URLs are required for publication, but there are no constraints on # the models, and we want to future-proof this. - @dc['dc.rights.uri'] = thesis_license.evaluate_license_url + add_metadata('dc.rights.uri', thesis_license.evaluate_license_url) else # author holds copyright and no license provided - @dc['dc.rights'] = thesis_copyright.statement_dspace - @dc['dc.rights'] = 'Copyright retained by author(s)' - @dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url + add_metadata('dc.rights', thesis_copyright.statement_dspace) + add_metadata('dc.rights', 'Copyright retained by author(s)') + add_metadata('dc.rights.uri', thesis_copyright.url) if thesis_copyright.url end end def date_transferred(files) - @dc['dc.date.submitted'] = files.select { |file| file.purpose == 'thesis_pdf' }.first.blob.created_at + add_metadata('dc.date.submitted', files.select { |file| file.purpose == 'thesis_pdf' }.first.blob.created_at) + end + + private + + def add_metadata(key, value) + return if value.nil? + + @metadata_entries << { 'key' => key, 'value' => value } + end + + # DSpace 6 expects metadata to be sent as a flat array of key/value pairs under + # a top-level "metadata" key (added by serialize_dss_metadata). + # + # Example returned by this method: + # [ + # { 'key' => 'dc.title', 'value' => 'My Thesis' }, + # { 'key' => 'dc.contributor.author', 'value' => 'Student, Second' }, + # { 'key' => 'dc.contributor.author', 'value' => 'Student, Third' } + # ] + def serialize_dspace6 + @metadata_entries + end + + # DSpace 8 expects top-level metadata keys, where each key maps to an array of + # value objects. We convert from our internal flat entries so both DSpace 6 and + # DSpace 8 serializers can share the same source data. + # + # Example returned by this method: + # { + # 'dc.title' => [{ 'value' => 'My Thesis' }], + # 'dc.contributor.author' => [ + # { 'value' => 'Student, Second' }, + # { 'value' => 'Student, Third' } + # ] + # } + # + # Note: language is intentionally omitted for now (out of scope). + def serialize_dspace8 + result = {} + + @metadata_entries.each do |entry| + key = entry['key'] + value = entry['value'] + + result[key] ||= [] + result[key] << { 'value' => value } + end + + result end end diff --git a/config/features.rb b/config/features.rb index aa4b3509..9fa97145 100644 --- a/config/features.rb +++ b/config/features.rb @@ -6,4 +6,8 @@ feature :maintenance_mode, default: ENV.fetch('MAINTENANCE_MODE', false), description: "Put application in maintenance mode, disabling file transfer uploads." + + feature :dspace_v8_metadata, + default: ENV.fetch('DSPACE_V8_METADATA', false), + description: "Use DSpace 8 metadata format instead of DSpace 6 metadata format." end diff --git a/test/models/dspace_metadata_test.rb b/test/models/dspace_metadata_test.rb index 2ca86e2b..6546c4cd 100644 --- a/test/models/dspace_metadata_test.rb +++ b/test/models/dspace_metadata_test.rb @@ -1,6 +1,10 @@ require 'test_helper' class DspaceMetadataTest < ActiveSupport::TestCase + setup do + Flipflop::FeatureSet.current.test!.switch!(:dspace_v8_metadata, false) + end + # Attaching thesis file so tests will pass def dss_friendly_thesis(thesis) file = Rails.root.join('test', 'fixtures', 'files', 'a_pdf.pdf') @@ -347,4 +351,40 @@ def dss_friendly_thesis(thesis) assert_equal unserialized['metadata'].first, { 'key' => 'dc.publisher', 'value' => 'Massachusetts Institute of Technology' } end + + test 'metadata serializes in DSpace 6 format when feature flag is disabled' do + test_strategy = Flipflop::FeatureSet.current.test! + test_strategy.switch!(:dspace_v8_metadata, false) + + t = theses(:one) + dss_friendly_thesis(t) + serialized = DspaceMetadata.new(t).serialize_dss_metadata + unserialized = JSON.parse(serialized) + + assert_equal ['metadata'], unserialized.keys + assert_kind_of Array, unserialized['metadata'] + assert unserialized['metadata'].include?({ 'key' => 'dc.title', 'value' => 'MyString' }) + end + + test 'metadata serializes in DSpace 8 format when feature flag is enabled' do + test_strategy = Flipflop::FeatureSet.current.test! + test_strategy.switch!(:dspace_v8_metadata, true) + + t = Thesis.create(title: 'Who cares', graduation_year: '2021', graduation_month: 'February', + advisors: [advisors(:first), advisors(:second)], + users: [users(:second), users(:third)], + degrees: [degrees(:one), degrees(:two)], + departments: [departments(:one), departments(:two)], + copyright: copyrights(:mit)) + dss_friendly_thesis(t) + serialized = DspaceMetadata.new(t).serialize_dss_metadata + unserialized = JSON.parse(serialized) + + refute unserialized.key?('metadata') + assert_kind_of Array, unserialized['dc.contributor.author'] + assert_equal({ 'value' => 'Student, Second' }, unserialized['dc.contributor.author'].first) + assert_includes unserialized['dc.contributor.author'], { 'value' => 'Student, Third' } + assert_includes unserialized['dc.contributor.advisor'], { 'value' => 'Addy McAdvisor' } + assert_includes unserialized['dc.contributor.advisor'], { 'value' => 'Viola McAdvisor' } + end end