Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ GEM
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unicode-display_width (2.6.0)
uri (1.0.3)
uri (1.0.4)
zeitwerk (2.7.2)

PLATFORMS
Expand Down
6 changes: 6 additions & 0 deletions lib/folio_api_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def with_token_refresh_attempt_when_unauthorized
# one time in responde to a 401 or 403.
refresh_auth_token!

# Re-run block
yield
rescue Faraday::ConnectionFailed
# If we make too many rapid requests in a row, FOLIO sometimes disconnects.
# If this happens, we'll sleep for a little while and retry the request.
sleep 5
# Re-run block
yield
end
Expand Down
52 changes: 42 additions & 10 deletions lib/folio_api_client/finders.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,25 +90,49 @@ def find_source_record(instance_record_id: nil, instance_record_hrid: nil)
source_record_search_results['sourceRecords'].first
end

def find_source_marc_records(modified_since: nil, with_965_value: nil, &block)
query = marc_records_query(modified_since: modified_since, with_965_value: with_965_value)
# Retrieve and yield marc records, filtered by the given modified_since and with_965_value parameters.
# NOTE: This method skips staff-suppressed FOLIO records.
def find_source_marc_records(modified_since: nil, with_965_value: nil, &block) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
# FOLIO does not allow an offset value higher than 9999, but this method needs to be able to retrieve
# result sets that have more than 9999 results, so we break big queries up into a bunch of smaller
# queries that split up the results based on the first character of the instance UUIDs. The first character
# of a UUID is a hex character (0-9 or a-f), which means that we will perform 16 different searches
# (i.e. "all of the results that have instance ids that start with a", then "all of the results that have
# instance ids that start with b", and so on).

# Since we're splitting up the query into a bunch of different sub-queries, we need to do a
# non-prefix-filtered query first just to get the total number of results.
total_query = response = self.get(
'search/instances',
marc_records_query(modified_since: modified_since, with_965_value: with_965_value).merge({ limit: 0 })
)
total_records = total_query['totalRecords']

loop do
response = self.get('search/instances', query)
process_marc_for_instance(response['instances'], &block) if block
break if (query[:offset] + query[:limit]) >= response['totalRecords']
with_uuid_prefixes do |uuid_prefix|
query = marc_records_query(modified_since: modified_since, with_965_value: with_965_value,
uuid_prefix: uuid_prefix)
loop do
response = self.get('search/instances', query)
process_marc_for_instance(response['instances'], total_records, &block) if block
break if (query[:offset] + query[:limit]) >= response['totalRecords']

query[:offset] += query[:limit]
query[:offset] += query[:limit]
end
end
end

def process_marc_for_instance(instances, &block)
# UUIDs can start with
def with_uuid_prefixes(&block)
(('0'..'9').to_a + ('a'..'f').to_a).each(&block)
end

def process_marc_for_instance(instances, total_records, &block)
instances.each do |instance|
source_record = find_source_record(instance_record_id: instance['id'])
next if source_record.nil? # Occasionally, we find an instance record without a source record. Skip these.

marc_content = source_record.dig('parsedRecord', 'content')
yield(marc_content) if marc_content && block
yield(marc_content, total_records) if marc_content && block
end
end

Expand All @@ -135,7 +159,7 @@ def location_record_query(code: nil)
'Missing query field. Must supply a code.'
end

def marc_records_query(modified_since: nil, with_965_value: nil) # rubocop:disable Metrics/MethodLength
def marc_records_query(modified_since: nil, with_965_value: nil, uuid_prefix: nil) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity
params = { limit: 100, offset: 0 }

if modified_since.nil? && with_965_value.nil?
Expand All @@ -152,6 +176,14 @@ def marc_records_query(modified_since: nil, with_965_value: nil) # rubocop:disab
query_parts << "metadata.updatedDate>=\"#{modified_since}\"" if modified_since
query_parts << %(identifiers.value="#{with_965_value}") if with_965_value

# Only include non-staff-suppressed records because staff-suppressed records represent deleted records in FOLIO.
# Reminder: staff-suppressed records are NOT the same as discovery-suppressed records. Discovery-suppressed
# records are ones that aren't displayed to the public, and we DO want to include discovery-suppressed records
# in the results returned by this query.
query_parts << 'staffSuppress==false'

query_parts << %(id="#{uuid_prefix}*") if uuid_prefix

params[:query] = query_parts.join(' and ')
params
end
Expand Down
177 changes: 157 additions & 20 deletions spec/folio_api_client/finders_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -331,28 +331,117 @@
end
end

describe '#with_uuid_prefixes' do
it 'yields the expected prefixes' do
expect { |b|
instance.with_uuid_prefixes(&b)
}.to yield_successive_args(
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f'
)
end
end

describe '#find_source_marc_records' do
let(:modified_since) { '2025-01-01T00:00:00Z' }
let(:value_965) { '965hyacinth' }
let(:marc_content) { { 'fields' => [{ '001' => '12345' }] } }
let(:instance_record) { { 'id' => 'instance-123' } }
let(:instances_response) { { 'totalRecords' => 1, 'instances' => [instance_record] } }
let(:instance_record_1) { { 'id' => 'instance-123' } }
let(:instance_record_2) { { 'id' => 'instance-456' } }
let(:instances) { [instance_record_1, instance_record_2] }
let(:total_records) { instances.length }
let(:instances_response) { { 'totalRecords' => total_records, 'instances' => instances } }
let(:uuid_prefixes) do
arr = []
instance.with_uuid_prefixes { |uuid_prefix| arr << uuid_prefix }
arr
end

before do
allow(instance).to receive(:find_source_record).and_return({ 'parsedRecord' => { 'content' => marc_content } })
end

context 'with modified_since parameter' do
before do
# Allow first query for getting total result count
allow(instance).to receive(:get).with(
'search/instances', { query: "metadata.updatedDate>=\"#{modified_since}\"", limit: 100, offset: 0 }
'search/instances',
{ query: "metadata.updatedDate>=\"#{modified_since}\" and staffSuppress==false", limit: 0, offset: 0 }
).and_return(instances_response)

# Allow uuid-prefix-bucket queries for aggregating actual results
uuid_prefixes.each do |uuid_prefix|
allow(instance).to receive(:get).with(
'search/instances',
{
query: %(metadata.updatedDate>="#{modified_since}" and staffSuppress==false and id="#{uuid_prefix}*"),
limit: 100,
offset: 0
}
).and_return(instances_response)
end
end

it 'yields MARC content for each source record' do
yielded_records = []
instance.find_source_marc_records(modified_since: modified_since) { |record| yielded_records << record }
expect(yielded_records).to eq([marc_content])
it 'retrieves records in batches based on uuid prefix' do
# Expect the first /search/instances query to be one that just fetches the total record count
# (and has a limit of 0 because we don't need results to be returned for the record count query).
expect(instance).to receive(:get).with(
'search/instances',
{ query: "metadata.updatedDate>=\"#{modified_since}\" and staffSuppress==false", limit: 0, offset: 0 }
)

# Then expect queries for each uuid prefix batch
uuid_prefixes.each do |uuid_prefix|
expect(instance).to receive(:get).with(
'search/instances',
{
query: %(metadata.updatedDate>="#{modified_since}" and staffSuppress==false and id="#{uuid_prefix}*"),
limit: 100,
offset: 0
}
).and_return(instances_response)
end

instance.find_source_marc_records(modified_since: modified_since) { |_| }
end

it 'yields MARC content for each source record' do # rubocop:disable RSpec/ExampleLength
expect { |b|
instance.find_source_marc_records(modified_since: modified_since, &b)
}.to yield_successive_args(
[marc_content, total_records], # batch 0, record 1
[marc_content, total_records], # batch 0, record 2
[marc_content, total_records], # batch 1, record 1
[marc_content, total_records], # batch 1, record 2
[marc_content, total_records], # batch 2, record 1
[marc_content, total_records], # batch 2, record 2
[marc_content, total_records], # batch 3, record 1
[marc_content, total_records], # batch 3, record 2
[marc_content, total_records], # batch 4, record 1
[marc_content, total_records], # batch 4, record 2
[marc_content, total_records], # batch 5, record 1
[marc_content, total_records], # batch 5, record 2
[marc_content, total_records], # batch 6, record 1
[marc_content, total_records], # batch 6, record 2
[marc_content, total_records], # batch 7, record 1
[marc_content, total_records], # batch 7, record 2
[marc_content, total_records], # batch 8, record 1
[marc_content, total_records], # batch 8, record 2
[marc_content, total_records], # batch 9, record 1
[marc_content, total_records], # batch 9, record 2
[marc_content, total_records], # batch a, record 1
[marc_content, total_records], # batch a, record 2
[marc_content, total_records], # batch b, record 1
[marc_content, total_records], # batch b, record 2
[marc_content, total_records], # batch c, record 1
[marc_content, total_records], # batch c, record 2
[marc_content, total_records], # batch d, record 1
[marc_content, total_records], # batch d, record 2
[marc_content, total_records], # batch e, record 1
[marc_content, total_records], # batch e, record 2
[marc_content, total_records], # batch f, record 1
[marc_content, total_records] # batch f, record 2
)
end

it 'raises an exception if the given modified_since parameter is an invalid format' do
Expand All @@ -364,37 +453,85 @@

context 'with with_965_value parameter' do
before do
# Allow first query for getting total result count
allow(instance).to receive(:get).with(
'search/instances', { query: %(identifiers.value="#{value_965}"), limit: 100, offset: 0 }
'search/instances',
{ query: %(identifiers.value="#{value_965}" and staffSuppress==false), limit: 0, offset: 0 }
).and_return(instances_response)

# Allow uuid-prefix-bucket queries for aggregating actual results
uuid_prefixes.each do |uuid_prefix|
allow(instance).to receive(:get).with(
'search/instances',
{
query: %(identifiers.value="#{value_965}" and staffSuppress==false and id="#{uuid_prefix}*"),
limit: 100,
offset: 0
}
).and_return(instances_response)
end
end

it 'uses the correct query' do
instance.find_source_marc_records(with_965_value: value_965) { |_| }
expect(instance).to have_received(:get).with(
'search/instances', { query: 'identifiers.value="965hyacinth"', limit: 100, offset: 0 }
)

# Then expect queries for each uuid prefix batch
uuid_prefixes.each do |uuid_prefix|
expect(instance).to have_received(:get).with(
'search/instances',
{
query: %(identifiers.value="#{value_965}" and staffSuppress==false and id="#{uuid_prefix}*"),
limit: 100,
offset: 0
}
)
end
end
end

context 'with modified_since and with_965_value parameters' do
before do
# Allow first query for getting total result count
allow(instance).to receive(:get).with(
'search/instances', {
query: %(metadata.updatedDate>="#{modified_since}" and identifiers.value="#{value_965}"),
limit: 100, offset: 0
'search/instances',
{
query: %(metadata.updatedDate>="#{modified_since}" and identifiers.value="#{value_965}" and staffSuppress==false), # rubocop:disable Layout/LineLength
limit: 0,
offset: 0
}
).and_return(instances_response)

puts %(
multiline!
cool!
cool!
)

# Allow uuid-prefix-bucket queries for aggregating actual results
uuid_prefixes.each do |uuid_prefix|
allow(instance).to receive(:get).with(
'search/instances',
{
query: %(metadata.updatedDate>="#{modified_since}" and identifiers.value="#{value_965}" and staffSuppress==false and id="#{uuid_prefix}*"), # rubocop:disable Layout/LineLength
limit: 100,
offset: 0
}
).and_return(instances_response)
end
end

it 'combines both filters' do
instance.find_source_marc_records(modified_since: modified_since, with_965_value: value_965) { |_| }
expect(instance).to have_received(:get).with(
'search/instances', {
query: %(metadata.updatedDate>="#{modified_since}" and identifiers.value="#{value_965}"),
limit: 100, offset: 0
}
)

# Then expect queries for each uuid prefix batch
uuid_prefixes.each do |uuid_prefix|
expect(instance).to have_received(:get).with(
'search/instances', {
query: %(metadata.updatedDate>="#{modified_since}" and identifiers.value="#{value_965}" and staffSuppress==false and id="#{uuid_prefix}*"), # rubocop:disable Layout/LineLength
limit: 100, offset: 0
}
)
end
end
end

Expand Down