diff --git a/Gemfile b/Gemfile index 78af56e..f957415 100644 --- a/Gemfile +++ b/Gemfile @@ -43,6 +43,8 @@ group :development, :test do end gem "avo", ">= 3.2" +gem "csv", "~> 3.3" + gem "devise", "~> 4.9" gem "importmap-rails", "~> 2.1" @@ -56,6 +58,8 @@ gem "dotenv", groups: [ :development, :test ] gem "feedjira", "~> 3.2" +gem "fugit", "~> 1.11" + gem "http", "~> 5.3" gem "iconv", "~> 1.1" diff --git a/Gemfile.lock b/Gemfile.lock index a62221f..2b3c7d4 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -132,6 +132,7 @@ GEM concurrent-ruby (1.3.5) connection_pool (2.5.3) crass (1.0.6) + csv (3.3.5) date (3.4.1) debug (1.11.0) irb (~> 1.10) @@ -428,10 +429,12 @@ DEPENDENCIES bootsnap brakeman commonmarker (~> 2.3) + csv (~> 3.3) debug devise (~> 4.9) dotenv feedjira (~> 3.2) + fugit (~> 1.11) good_job (~> 4.11) http (~> 5.3) iconv (~> 1.1) diff --git a/app/avo/resources/statcan_dataset.rb b/app/avo/resources/statcan_dataset.rb new file mode 100644 index 0000000..7c25498 --- /dev/null +++ b/app/avo/resources/statcan_dataset.rb @@ -0,0 +1,16 @@ +class Avo::Resources::StatcanDataset < Avo::BaseResource + # self.includes = [] + # self.attachments = [] + # self.search = { + # query: -> { query.ransack(id_eq: params[:q], m: "or").result(distinct: false) } + # } + + def fields + field :id, as: :id + field :statcan_url, as: :text + field :name, as: :text + field :sync_schedule, as: :text + field :current_data, as: :code + field :last_synced_at, as: :date_time + end +end diff --git a/app/controllers/avo/statcan_datasets_controller.rb b/app/controllers/avo/statcan_datasets_controller.rb new file mode 100644 index 0000000..4b0b7a2 --- /dev/null +++ b/app/controllers/avo/statcan_datasets_controller.rb @@ -0,0 +1,4 @@ +# This controller has been generated to enable Rails' resource routes. +# More information on https://docs.avohq.io/3.0/controllers.html +class Avo::StatcanDatasetsController < Avo::ResourcesController +end diff --git a/app/controllers/statcan_datasets_controller.rb b/app/controllers/statcan_datasets_controller.rb new file mode 100644 index 0000000..429c6dc --- /dev/null +++ b/app/controllers/statcan_datasets_controller.rb @@ -0,0 +1,6 @@ +class StatcanDatasetsController < ApplicationController + def show + dataset = StatcanDataset.find(params[:id]) + render json: dataset + end +end diff --git a/app/jobs/statcan_cron_job.rb b/app/jobs/statcan_cron_job.rb new file mode 100644 index 0000000..48bfdc4 --- /dev/null +++ b/app/jobs/statcan_cron_job.rb @@ -0,0 +1,11 @@ +class StatcanCronJob < ApplicationJob + queue_as :default + + def perform(current_time = Time.current) + datasets = StatcanDataset.select(:id, :sync_schedule, :last_synced_at) + stale_datasets = StatcanDataset.filter_stale(datasets, current_time) + stale_datasets.each(&StatcanSyncJob.method(:perform_later)) + + Rails.logger.info "Enqueued #{stale_datasets.count} Statcan sync jobs" + end +end diff --git a/app/jobs/statcan_sync_job.rb b/app/jobs/statcan_sync_job.rb new file mode 100644 index 0000000..1fe719b --- /dev/null +++ b/app/jobs/statcan_sync_job.rb @@ -0,0 +1,7 @@ +class StatcanSyncJob < ApplicationJob + queue_as :default + + def perform(statcan_dataset) + statcan_dataset.sync! + end +end diff --git a/app/models/statcan_dataset.rb b/app/models/statcan_dataset.rb new file mode 100644 index 0000000..c095152 --- /dev/null +++ b/app/models/statcan_dataset.rb @@ -0,0 +1,35 @@ +class StatcanDataset < ApplicationRecord + validates :statcan_url, presence: true, uniqueness: true, format: { with: URI::DEFAULT_PARSER.make_regexp } + validates :name, presence: true, uniqueness: true, format: { with: /\A[a-z0-9-]+\z/, message: "must be lowercase with hyphens only" } + validates :sync_schedule, presence: true + validate :valid_cron_expression + + def self.filter_stale(datasets, current_time = Time.current) + datasets.select { |dataset| dataset.needs_sync?(current_time) } + end + + def needs_sync?(current_time = Time.current) + return true if last_synced_at.nil? + + cron = Fugit::Cron.parse(sync_schedule) + last_scheduled_time = cron.previous_time(current_time) + + last_synced_at.to_i < last_scheduled_time.seconds + end + + def sync! + data = StatcanFetcher.fetch(statcan_url) + update!(current_data: data, last_synced_at: Time.current) + end + + private + + def valid_cron_expression + return unless sync_schedule.present? + + parsed_cron = Fugit::Cron.parse(sync_schedule) + if parsed_cron.nil? + errors.add(:sync_schedule, "must be a valid cron expression") + end + end +end diff --git a/app/services/statcan_fetcher.rb b/app/services/statcan_fetcher.rb new file mode 100644 index 0000000..6ab47e2 --- /dev/null +++ b/app/services/statcan_fetcher.rb @@ -0,0 +1,21 @@ +require "csv" + +class StatcanFetcher + def self.fetch(url) + response = HTTP + .timeout(connect: 10, read: 60) + .headers("User-Agent" => "BuildCanada/OutcomeTrackerAPI") + .get(url) + + unless response.status.success? + raise "HTTP Error: #{response.status} - #{response.status.reason}" + end + + csv_string = response.body.to_s + + # Remove UTF-8 Byte Order Mark (BOM) if present + csv_string = csv_string.sub(/\A\uFEFF/, "") + + CSV.parse(csv_string, headers: true, liberal_parsing: true, skip_blanks: true).map(&:to_h) + end +end diff --git a/config/environments/test.rb b/config/environments/test.rb index 30cc5cd..5f4c96a 100644 --- a/config/environments/test.rb +++ b/config/environments/test.rb @@ -52,5 +52,5 @@ config.action_controller.raise_on_missing_callback_actions = true # Use inline adapter for Active Job in tests for faster execution - config.active_job.queue_adapter = :inline + config.active_job.queue_adapter = :test end diff --git a/config/initializers/good_job.rb b/config/initializers/good_job.rb index 322bed7..01cee83 100644 --- a/config/initializers/good_job.rb +++ b/config/initializers/good_job.rb @@ -16,6 +16,10 @@ class: "FeedRefresherJob", # name of the job class as a String; must reference an Active Job job class description: "Refreshed feeds and creates new entries", # optional description that appears in Dashboard, enabled_by_default: -> { Rails.env.production? } # Only enable in production, otherwise can be enabled manually through Dashboard + }, + statcan_sync: { + cron: "0 * * * *", # Every hour + class: "StatcanCronJob" } } diff --git a/config/routes.rb b/config/routes.rb index 0d5c6be..9c2b7c1 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -18,6 +18,7 @@ resources :promises, only: [ :index, :show ] resources :evidences, only: [ :index, :show ] resources :builders, only: [ :index, :show ] + resources :statcan_datasets, only: [ :show ] namespace :admin do resources :promises, only: [ :index, :show, :update, :destroy ] diff --git a/db/migrate/20250707155320_create_statcan_datasets.rb b/db/migrate/20250707155320_create_statcan_datasets.rb new file mode 100644 index 0000000..388b7ca --- /dev/null +++ b/db/migrate/20250707155320_create_statcan_datasets.rb @@ -0,0 +1,17 @@ +class CreateStatcanDatasets < ActiveRecord::Migration[8.0] + def change + create_table :statcan_datasets do |t| + t.text :statcan_url, null: false + t.string :name, null: false + t.string :sync_schedule, null: false + t.jsonb :current_data + t.timestamp :last_synced_at + + t.timestamps + end + + add_index :statcan_datasets, :statcan_url, unique: true + add_index :statcan_datasets, :name, unique: true + add_index :statcan_datasets, :last_synced_at + end +end diff --git a/db/schema.rb b/db/schema.rb index 89b6810..0fa7e64 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_06_30_162816) do +ActiveRecord::Schema[8.0].define(version: 2025_07_07_155320) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" @@ -158,6 +158,7 @@ t.string "language" t.string "url" t.jsonb "raw" + t.string "source_url" t.bigint "government_id", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false @@ -365,6 +366,19 @@ t.datetime "updated_at", null: false end + create_table "statcan_datasets", force: :cascade do |t| + t.text "statcan_url", null: false + t.string "name", null: false + t.string "sync_schedule", null: false + t.jsonb "current_data" + t.datetime "last_synced_at", precision: nil + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["last_synced_at"], name: "index_statcan_datasets_on_last_synced_at" + t.index ["name"], name: "index_statcan_datasets_on_name", unique: true + t.index ["statcan_url"], name: "index_statcan_datasets_on_statcan_url", unique: true + end + create_table "tool_calls", force: :cascade do |t| t.bigint "message_id", null: false t.string "tool_call_id" diff --git a/db/seeds/canada.rb b/db/seeds/canada.rb index 722e6f9..697e832 100644 --- a/db/seeds/canada.rb +++ b/db/seeds/canada.rb @@ -637,4 +637,6 @@ puts "Seeding Evidences..." +require_relative 'statcan_datasets' + puts "Done seeding" diff --git a/db/seeds/statcan_datasets.rb b/db/seeds/statcan_datasets.rb new file mode 100644 index 0000000..f4d1f57 --- /dev/null +++ b/db/seeds/statcan_datasets.rb @@ -0,0 +1,61 @@ +puts "Seeding StatcanDatasets..." + +statcan_datasets = [ + { + name: "balance-sheets", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData-nonTraduit.action?pid=1010001501&latestN=0&startDate=19901001&endDate=&csvLocale=en&selectedMembers=%5B%5B1%5D%2C%5B2%5D%2C%5B%5D%5D&checkedLevels=2D1%2C2D2%2C2D3", + sync_schedule: "23 6 * * *" # Daily at 6:23 AM + }, + { + name: "demographic-incomes-non-permanent-residents", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData-nonTraduit.action?pid=1110009101&latestN=2&startDate=&endDate=&csvLocale=en&selectedMembers=%5B%5B1%5D%2C%5B%5D%2C%5B%5D%2C%5B%5D%2C%5B5%5D%5D&checkedLevels=1D1%2C1D2%2C1D3%2C2D1%2C3D1", + sync_schedule: "23 8 * * *" # Daily at 8:23 AM + }, + { + name: "gdp", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData-nonTraduit.action?pid=3610010401&latestN=0&startDate=19610101&endDate=&csvLocale=en&selectedMembers=%5B%5B1%5D%2C%5B1%5D%2C%5B1%5D%2C%5B%5D%5D&checkedLevels=3D1%2C3D2%2C3D3%2C3D4", + sync_schedule: "23 6 * * *" # Daily at 6:23 AM + }, + { + name: "housing-starts", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData.action?pid=3410015101&latestN=0&startDate=19880101&endDate=&csvLocale=en&selectedMembers=%5B%5B%5D%2C%5B1%5D%2C%5B%5D%5D&checkedLevels=0D1%2C2D1%2C2D2", + sync_schedule: "23 6 * * *" # Daily at 6:23 AM + }, + { + name: "labour-productivity", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData-nonTraduit.action?pid=3610020701&latestN=0&startDate=19801001&endDate=&csvLocale=en&selectedMembers=%5B%5B1%5D%2C%5B5%5D%2C%5B1%2C2%2C3%2C4%2C5%2C6%2C7%2C8%2C9%2C10%2C11%2C13%2C14%2C15%2C16%2C17%2C18%2C19%2C20%2C21%5D%5D&checkedLevels=", + sync_schedule: "23 9 * * *" # Daily at 9:23 AM + }, + { + name: "non-permanent-residents", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData.action?pid=1710012101&latestN=0&startDate=20210101&endDate=&csvLocale=en&selectedMembers=%5B%5B%5D%2C%5B%5D%5D&checkedLevels=0D1%2C1D1%2C1D2%2C1D3", + sync_schedule: "23 7 * * *" # Daily at 7:23 AM + }, + { + name: "population", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData-nonTraduit.action?pid=1710000901&latestN=0&startDate=19000101&endDate=&csvLocale=en&selectedMembers=%5B%5B1%2C2%2C3%2C4%2C5%2C6%2C7%2C8%2C9%2C10%2C11%2C12%2C14%2C15%5D%5D&checkedLevels=", + sync_schedule: "23 6 * * *" # Daily at 6:23 AM + }, + { + name: "primary-energy-production", + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/dtl!downloadDbLoadingData-nonTraduit.action?pid=2510007901&latestN=5&startDate=&endDate=&csvLocale=en&selectedMembers=%5B%5B%5D%2C%5B%5D%2C%5B%5D%5D&checkedLevels=0D1%2C1D1%2C1D2%2C1D3%2C2D1", + sync_schedule: "23 10 * * *" # Daily at 10:23 AM + } +] + +statcan_datasets.each do |dataset_attrs| + dataset = StatcanDataset.find_or_create_by(name: dataset_attrs[:name]) do |d| + d.statcan_url = dataset_attrs[:statcan_url] + d.sync_schedule = dataset_attrs[:sync_schedule] + end + + if dataset.persisted? + if dataset.previously_new_record? + puts "✓ #{dataset.name} - created" + else + puts "✓ #{dataset.name} - already exists" + end + else + puts "✗ #{dataset.name} - failed to create: #{dataset.errors.full_messages.join(', ')}" + end +end diff --git a/test/controllers/statcan_datasets_controller_test.rb b/test/controllers/statcan_datasets_controller_test.rb new file mode 100644 index 0000000..5eff3f3 --- /dev/null +++ b/test/controllers/statcan_datasets_controller_test.rb @@ -0,0 +1,22 @@ +require "test_helper" + +class StatcanDatasetsControllerTest < ActionDispatch::IntegrationTest + test "should show dataset" do + dataset = statcan_datasets(:synced) + + get statcan_dataset_url(dataset) + + assert_response :success + assert_equal "application/json; charset=utf-8", response.content_type + + json_response = JSON.parse(response.body) + assert_equal dataset.id, json_response["id"] + assert_equal dataset.name, json_response["name"] + end + + test "should return 404 for non-existent dataset" do + get statcan_dataset_url(id: 99999) + + assert_response :not_found + end +end diff --git a/test/fixtures/statcan_datasets.yml b/test/fixtures/statcan_datasets.yml new file mode 100644 index 0000000..52c2644 --- /dev/null +++ b/test/fixtures/statcan_datasets.yml @@ -0,0 +1,15 @@ +unsynced: + statcan_url: "https://statcan.gc.ca/123.csv" + name: "test-dataset-unsynced" + sync_schedule: "0 0 * * *" + current_data: null + last_synced_at: null + +synced: + statcan_url: "https://statcan.gc.ca/456.csv" + name: "test-dataset-synced" + sync_schedule: "0 0 * * *" + current_data: + - year: 2020 + population: 38000000 + last_synced_at: "2024-01-15 10:30:00" \ No newline at end of file diff --git a/test/jobs/statcan_cron_job_test.rb b/test/jobs/statcan_cron_job_test.rb new file mode 100644 index 0000000..23d06d6 --- /dev/null +++ b/test/jobs/statcan_cron_job_test.rb @@ -0,0 +1,62 @@ +require "test_helper" + +class StatcanCronJobTest < ActiveJob::TestCase + def setup + # Remove fixture data before each test + StatcanDataset.delete_all + end + + test "should enqueue sync jobs for stale datasets only" do + current_time = Time.parse("2025-01-02 14:00:00") # 2pm + + # Create a stale dataset (never synced) + stale_dataset1 = StatcanDataset.create!( + name: "stale-never-synced", + statcan_url: "https://statcan.gc.ca/stale1.csv", + sync_schedule: "0 0 * * *", + last_synced_at: nil + ) + + # Create another stale dataset (old sync) + stale_dataset2 = StatcanDataset.create!( + name: "stale-old-sync", + statcan_url: "https://statcan.gc.ca/stale2.csv", + sync_schedule: "0 0 * * *", + last_synced_at: Time.parse("2025-01-01 23:00:00") # Yesterday 11pm + ) + + # Create a fresh dataset (recent sync) + _fresh_dataset = StatcanDataset.create!( + name: "fresh-dataset", + statcan_url: "https://statcan.gc.ca/fresh.csv", + sync_schedule: "0 0 * * *", + last_synced_at: Time.parse("2025-01-02 01:00:00") # 1am today + ) + + # Track enqueued jobs + assert_enqueued_jobs 2, only: StatcanSyncJob do + StatcanCronJob.perform_now(current_time) + end + + # Verify the correct jobs were enqueued + assert_enqueued_with(job: StatcanSyncJob, args: [ stale_dataset1 ]) + assert_enqueued_with(job: StatcanSyncJob, args: [ stale_dataset2 ]) + end + + test "should not enqueue jobs when no datasets need syncing" do + current_time = Time.parse("2025-01-02 14:00:00") + + # Create only fresh datasets + StatcanDataset.create!( + name: "fresh-dataset-1", + statcan_url: "https://statcan.gc.ca/fresh1.csv", + sync_schedule: "0 0 * * *", + last_synced_at: Time.parse("2025-01-02 01:00:00") # 1am today + ) + + # Should not enqueue any jobs + assert_enqueued_jobs 0, only: StatcanSyncJob do + StatcanCronJob.perform_now(current_time) + end + end +end diff --git a/test/models/statcan_dataset_test.rb b/test/models/statcan_dataset_test.rb new file mode 100644 index 0000000..f278f0a --- /dev/null +++ b/test/models/statcan_dataset_test.rb @@ -0,0 +1,182 @@ +require "test_helper" + +class StatcanDatasetTest < ActiveSupport::TestCase + def self.valid_attributes + { + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1410028701", + name: "demographic-incomes", + sync_schedule: "0 6 * * *" + } + end + + + test "valid dataset" do + dataset = StatcanDataset.new(self.class.valid_attributes) + + assert dataset.valid? + end + + test "requires statcan_url" do + attributes = self.class.valid_attributes.except(:statcan_url) + dataset = StatcanDataset.new(attributes) + + assert_not dataset.valid? + assert_includes dataset.errors[:statcan_url], "can't be blank" + end + + test "requires name" do + attributes = self.class.valid_attributes.except(:name) + dataset = StatcanDataset.new(attributes) + + assert_not dataset.valid? + assert_includes dataset.errors[:name], "can't be blank" + end + + test "requires sync_schedule" do + attributes = self.class.valid_attributes.except(:sync_schedule) + dataset = StatcanDataset.new(attributes) + + assert_not dataset.valid? + assert_includes dataset.errors[:sync_schedule], "can't be blank" + end + + test "statcan_url must be unique" do + url = "https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1410028701" + + StatcanDataset.create!( + statcan_url: url, + name: "first-dataset", + sync_schedule: "0 6 * * *" + ) + + duplicate = StatcanDataset.new( + statcan_url: url, + name: "second-dataset", + sync_schedule: "0 12 * * *" + ) + + assert_not duplicate.valid? + assert_includes duplicate.errors[:statcan_url], "has already been taken" + end + + test "name must be unique" do + name = "demographic-incomes" + + StatcanDataset.create!( + statcan_url: "https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1410028701", + name: name, + sync_schedule: "0 6 * * *" + ) + + duplicate = StatcanDataset.new( + statcan_url: "https://different-url.statcan.gc.ca/data", + name: name, + sync_schedule: "0 12 * * *" + ) + + assert_not duplicate.valid? + assert_includes duplicate.errors[:name], "has already been taken" + end + + test "name must be kebab-case" do + attributes = self.class.valid_attributes.merge(name: "InvalidName") + dataset = StatcanDataset.new(attributes) + + assert_not dataset.valid? + assert_includes dataset.errors[:name], "must be lowercase with hyphens only" + end + + test "sync_schedule accepts valid cron expressions" do + valid_schedules = [ "0 6 * * *", "30 14 1 * *", "0 * * * 0", "15 9 * * 1-5" ] + + valid_schedules.each do |schedule| + attributes = self.class.valid_attributes.merge(sync_schedule: schedule, name: "test-dataset-#{schedule.hash}") + dataset = StatcanDataset.new(attributes) + + assert dataset.valid?, "#{schedule} should be valid" + end + end + + test "sync_schedule rejects invalid cron expressions" do + invalid_schedules = [ "invalid", "60 25 32 13 8", "not a cron" ] + + invalid_schedules.each do |schedule| + attributes = self.class.valid_attributes.merge(name: "test-dataset-#{schedule.hash}", sync_schedule: schedule) + dataset = StatcanDataset.new(attributes) + + assert_not dataset.valid?, "#{schedule} should be invalid" + assert_includes dataset.errors[:sync_schedule], "must be a valid cron expression" + end + end + + test "needs_sync returns true when last_synced_at is nil" do + attributes = self.class.valid_attributes.merge(last_synced_at: nil) + dataset = StatcanDataset.new(attributes) + current_time = Time.parse("2025-01-02 14:00:00") + + assert dataset.needs_sync?(current_time) + end + + test "needs_sync returns true when last sync was before last scheduled time" do + attributes = self.class.valid_attributes.merge(sync_schedule: "0 0 * * *", last_synced_at: Time.parse("2025-01-01 23:00:00")) + dataset = StatcanDataset.new(attributes) + current_time = Time.parse("2025-01-02 14:00:00") # 2pm next day + + assert dataset.needs_sync?(current_time) + end + + test "needs_sync returns false when last sync was after last scheduled time" do + attributes = self.class.valid_attributes.merge(sync_schedule: "0 0 * * *", last_synced_at: Time.parse("2025-01-02 01:00:00")) + dataset = StatcanDataset.new(attributes) + current_time = Time.parse("2025-01-02 14:00:00") # 2pm same day + + assert_not dataset.needs_sync?(current_time) + end + + test "filter_stale returns datasets that need syncing" do + stale_attributes = self.class.valid_attributes.merge(last_synced_at: nil) + stale_dataset = StatcanDataset.new(stale_attributes) + + fresh_attributes = self.class.valid_attributes.merge(sync_schedule: "0 0 * * *", last_synced_at: Time.parse("2025-01-02 01:00:00")) + fresh_dataset = StatcanDataset.new(fresh_attributes) + + current_time = Time.parse("2025-01-02 14:00:00") # 2pm same day + all_datasets = [ stale_dataset, fresh_dataset ] + stale_datasets = StatcanDataset.filter_stale(all_datasets, current_time) + + assert_includes stale_datasets, stale_dataset + assert_not_includes stale_datasets, fresh_dataset + assert_equal 1, stale_datasets.length + end + + test "filter_stale works with empty collection" do + stale_datasets = StatcanDataset.filter_stale([]) + assert_empty stale_datasets + end + + test "sync! should update dataset with fetched data" do + dataset = statcan_datasets(:unsynced) + parsed_data = [ { "population" => 1000000, "year" => 2023 } ] + + StatcanFetcher.stub :fetch, parsed_data do + dataset.sync! + end + + dataset.reload + assert_equal parsed_data, dataset.current_data + assert_not_nil dataset.last_synced_at + end + + test "sync! should not update dataset when fetch times out" do + dataset = statcan_datasets(:unsynced) + + StatcanFetcher.stub :fetch, ->(url) { raise HTTP::TimeoutError.new("Request timed out") } do + assert_raises HTTP::TimeoutError do + dataset.sync! + end + end + + dataset.reload + assert_nil dataset.current_data + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 0c22470..332896f 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,6 +1,7 @@ ENV["RAILS_ENV"] ||= "test" require_relative "../config/environment" require "rails/test_help" +require "minitest/mock" module ActiveSupport class TestCase