Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions configurations/projector.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"hostName" : "NPM Publisher",
"server": {
"port": 3001,
"cors": {
"origin": true,
"credentials": true
}
},
"modules": {
"npmprojector": {
"enabled": false,
"fhirVersion": "r4",
"basePath": "/us-core",
"npm": "hl7.fhir.us.core#7.0.1",
"resourceFolders": ["data"],
"searchParametersFolder": "data",
"debounceMs": 500
}
}
}
5 changes: 5 additions & 0 deletions configurations/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
This folder contains some basic starter configurations:

* Terminology server: see tx-config.json for a vanilla server that doesn't contain any licensed content
* NPM web server: see projector.json for a basic configuration to make a package available online

37 changes: 27 additions & 10 deletions packages/package-crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,29 @@ const {XMLParser} = require('fast-xml-parser');
const crypto = require('crypto');
const fs = require('fs');
const path = require('path');
const {debugLog} = require("../tx/operation-context");

class PackageCrawler {
log;
packages = new Set();

constructor(config, db, stats) {
this.config = config;
this.db = db;
this.stats = stats;
this.totalBytes = 0;
this.crawlerLog = {};
this.errors = '';
this.abortController = null;
this.db.run('PRAGMA journal_mode = WAL');
this.db.run('PRAGMA busy_timeout = 5000');
}

async crawl(log) {
this.log = log;
this.packages.clear();

this.abortController = new AbortController();

const startTime = Date.now();
this.crawlerLog = {
startTime: new Date().toISOString(),
Expand All @@ -54,6 +57,7 @@ class PackageCrawler {

// Process each feed
for (const feedConfig of masterResponse.feeds) {
if (this.abortController?.signal.aborted) break;
if (!feedConfig.url) {
this.log.info('Skipping feed with no URL: '+ feedConfig);
continue;
Expand All @@ -71,6 +75,7 @@ class PackageCrawler {
}
// process simplifier last
for (const feedConfig of masterResponse.feeds) {
if (this.abortController?.signal.aborted) break;
if (!feedConfig.url) {
this.log.info('Skipping feed with no URL: '+ feedConfig);
continue;
Expand Down Expand Up @@ -123,14 +128,15 @@ class PackageCrawler {
} else {
const response = await axios.get(url, {
timeout: 30000,
signal: this.abortController?.signal,
headers: {
'User-Agent': 'FHIR Package Crawler/1.0'
}
});
return response.data;
}
} catch (error) {
console.log(error);
debugLog(error);
if (error.response && error.response.status === 429) {
throw new Error(`RATE_LIMITED: Server returned 429 Too Many Requests for ${url}`);
}
Expand All @@ -151,6 +157,7 @@ class PackageCrawler {
} else {
const response = await axios.get(url, {
timeout: 30000,
signal: this.abortController?.signal,
headers: {
'User-Agent': 'FHIR Package Crawler/1.0'
}
Expand All @@ -165,6 +172,7 @@ class PackageCrawler {
return parser.parse(response.data);
}
} catch (error) {
debugLog(error);
if (error.response && error.response.status === 429) {
throw new Error(`RATE_LIMITED: Server returned 429 Too Many Requests for ${url}`);
}
Expand All @@ -182,6 +190,7 @@ class PackageCrawler {
const response = await axios.get(url, {
timeout: 60000,
responseType: 'arraybuffer',
signal: this.abortController?.signal,
headers: {
'User-Agent': 'FHIR Package Crawler/1.0'
}
Expand All @@ -191,6 +200,7 @@ class PackageCrawler {
return Buffer.from(response.data);
}
} catch (error) {
debugLog(error);
if (error.response && error.response.status === 429) {
throw new Error(`RATE_LIMITED: Server returned 429 Too Many Requests for ${url}`);
}
Expand Down Expand Up @@ -222,6 +232,7 @@ class PackageCrawler {
this.log.info(`Found ${items.length} items in feed`);

for (let i = 0; i < items.length; i++) {
if (this.abortController?.signal.aborted) break;
try {
await this.updateItem(url, items[i], i, packageRestrictions, feedLog);
} catch (itemError) {
Expand All @@ -244,7 +255,7 @@ class PackageCrawler {
}

} catch (error) {
console.log(error);
debugLog(error);
// Check if this is a 429 error on feed fetch
if (error.message.includes('RATE_LIMITED')) {
this.log.info(`Rate limited while fetching feed ${url}, skipping this feed`);
Expand Down Expand Up @@ -302,7 +313,7 @@ class PackageCrawler {
}

// Check package restrictions
if (!this.isPackageAllowed(id, source, packageRestrictions)) {
if (!this.isPackageAllowed(id, source, packageRestrictions).allowed) {
if (!source.includes('simplifier.net')) {
const error = `The package ${id} is not allowed to come from ${source}`;
this.log.info(error);
Expand All @@ -329,11 +340,12 @@ class PackageCrawler {

// Parse publication date
let pubDate;
let pd;
try {
let pd = item.pubDate;
pubDate = this.parsePubDate(pd);
} catch (error) {
itemLog.error = `Invalid date format '{pd}': ${error.message}`;
itemLog.error = `Invalid date format '${pd}': ${error.message}`;
itemLog.status = 'error';
return;
}
Expand All @@ -355,7 +367,7 @@ class PackageCrawler {
itemLog.status = 'Fetched';

} catch (error) {
this.log.error(`Exception processing item ${itemLog.guid || index}:`+ error.message);
this.log.error(`Exception processing item ${itemLog.guid || index} from ${source}: `+ error.message);
itemLog.status = 'Exception';
itemLog.error = error.message;
if (error.message.includes('RATE_LIMITED')) {
Expand Down Expand Up @@ -383,7 +395,7 @@ class PackageCrawler {

if (this.matchesPattern(fixedPackageId, fixedMask)) {
// This package matches a restriction - check if source is allowed
const allowedFeeds = restriction.feeds.map(feed => feed);
const allowedFeeds = restriction.feeds.map(feed => fixUrl(feed));
const feedList = allowedFeeds.join(', ');

for (const allowedFeed of restriction.feeds) {
Expand Down Expand Up @@ -500,7 +512,7 @@ class PackageCrawler {
await this.commit(packageBuffer, npmPackage, date, guid, id, version, canonical, urls);

} catch (error) {
console.log(error);
debugLog(error);
this.log.error(`Error storing package ${guid}:`+ error.message);
throw error;
}
Expand Down Expand Up @@ -562,6 +574,7 @@ class PackageCrawler {
throw new Error('package.json not found in extracted package');
}

const packageJson = JSON.parse(files['package.json']);
const hasInstallScripts = !!(
packageJson.scripts && (
packageJson.scripts.preinstall ||
Expand All @@ -570,7 +583,6 @@ class PackageCrawler {
)
);
const hasJavaScript = Object.keys(files).some(f => f.endsWith('.js') || f.endsWith('.mjs') || f.endsWith('.cjs'));
const packageJson = JSON.parse(files['package.json']);

// Extract basic NPM fields
const id = packageJson.name || '';
Expand Down Expand Up @@ -921,6 +933,11 @@ class PackageCrawler {
return id;
}
}
shutdown() {
if (this.abortController) {
this.abortController.abort();
}
}
}

module.exports = PackageCrawler;
1 change: 1 addition & 0 deletions packages/packages.js
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,7 @@ class PackagesModule {

stopCrawlerJob() {
if (this.crawlerJob) {
this.crawler.shutdown();
this.crawlerJob.stop();
this.crawlerJob = null;
pckLog.info('Package crawler job stopped');
Expand Down
32 changes: 19 additions & 13 deletions registry/crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const {
ServerVersionInformation,
} = require('./model');
const {Extensions} = require("../tx/library/extensions");
const {debugLog} = require("../tx/operation-context");

const MASTER_URL = 'https://fhir.github.io/ig-registry/tx-servers.json';

Expand Down Expand Up @@ -89,7 +90,7 @@ class RegistryCrawler {
// Update the current data
this.currentData = newData;
} catch (error) {
console.log(error.message);
debugLog(error);
this.addLogEntry('error', 'Exception Scanning:', error);
this.currentData.outcome = `Error: ${error.message}`;
this.errors.push({
Expand Down Expand Up @@ -121,7 +122,7 @@ class RegistryCrawler {
}

if (!registry.address) {
this.addLogEntry('error', `No url provided for ${registry.name, registry.name}`, '');
this.addLogEntry('error', `No url provided for ${registry.name}`, '');
return registry;
}

Expand All @@ -145,7 +146,7 @@ class RegistryCrawler {
}

} catch (error) {
console.log(error.message);
debugLog(error);
registry.error = error.message;
this.addLogEntry('error', `Exception processing registry ${registry.name}: ${error.message}`, registry.address);
}
Expand Down Expand Up @@ -236,7 +237,7 @@ class RegistryCrawler {
this.addLogEntry('info', ` Server ${version.address}: ${version.lastTat} for ${version.codeSystems.length} CodeSystems and ${version.valueSets.length} ValueSets`);

} catch (error) {
console.log(error.message);
debugLog(error);
const elapsed = Date.now() - startTime;
this.addLogEntry('error', `Server ${version.address}: Error after ${elapsed}ms: ${error.message}`);
version.error = error.message;
Expand Down Expand Up @@ -281,8 +282,8 @@ class RegistryCrawler {
});
}
} catch (error) {
console.log(error.message);
this.addLogEntry('error', `Could not fetch terminology capabilities: ${error.message}`);
debugLog(error);
this.addLogEntry('error', `Could not fetch terminology capabilities from ${version.address}: ${error.message}`);
}

if (this.abortController?.signal.aborted) return;
Expand Down Expand Up @@ -330,8 +331,8 @@ class RegistryCrawler {
});
}
} catch (error) {
console.log(error.message);
this.addLogEntry('error', `Could not fetch terminology capabilities: ${error.message}`);
debugLog(error);
this.addLogEntry('error', `Could not fetch terminology capabilities from ${version.address}: ${error.message}`);
}

// Search for value sets
Expand All @@ -348,14 +349,19 @@ class RegistryCrawler {
*/
async fetchValueSets(version, server, exclusions) {
// Initial search URL
let count = 0;
let searchUrl = `${version.address}/ValueSet?_elements=url,version`+(version.address.includes("fhir.org") ? "&_count=200" : "");
try {
// Set of URLs to avoid duplicates
const valueSetUrls = new Set();


// Continue fetching while we have a URL
while (searchUrl) {
count++;
if (count == 1000) {
throw new Error(`Fetch ValueSet loop exceeded 1000 iterations - a logic problem on the server? (${version.address})`);
}

if (this.abortController?.signal.aborted) break;
this.log.debug(`Fetching value sets from ${searchUrl}`);
const bundle = await this.fetchJson(searchUrl, server.code);
Expand Down Expand Up @@ -389,7 +395,7 @@ class RegistryCrawler {
version.valueSets = Array.from(valueSetUrls).sort();

} catch (error) {
console.log(error.message);
debugLog(error);
this.addLogEntry('error', `Could not fetch value sets: ${error.message} from ${searchUrl}`);
}
}
Expand Down Expand Up @@ -467,7 +473,7 @@ class RegistryCrawler {
return response.data;

} catch (error) {
console.log(error.message);
debugLog(error);
if (error.response) {
throw new Error(`HTTP ${error.response.status}: ${error.response.statusText}`);
} else if (error.request) {
Expand Down Expand Up @@ -611,14 +617,14 @@ class RegistryCrawler {
* @param {string} level - Filter by log level
* @returns {Array} Array of log entries
*/
getLogs(limit = 100)
getLogs(limit = 100, level = null)
{
if (!this.logs) {
return [];
}

// Filter by level if specified
let filteredLogs = this.logs;
let filteredLogs = level ? this.logs.filter(entry => entry.level === level) : this.logs;

// Get the latest entries up to the limit
return filteredLogs.slice(-limit);
Expand Down
4 changes: 2 additions & 2 deletions tests/tx/library.test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
const {Library} = require("../../tx/library");
const path = require("path");
const {OperationContext} = require("../../tx/operation-context");
const {OperationContext, debugLog} = require("../../tx/operation-context");
const {Languages} = require("../../library/languages");

const NO_LOAD_TEST = true;
Expand Down Expand Up @@ -69,7 +69,7 @@ describe('Provider Test', () => {
}

} catch (error) {
console.log(error);
debugLog(error);
failureCount++;
failures.push(`${key}: ${error.message}`);
console.log(`✗ Error creating provider for ${key}: ${error.message}`);
Expand Down
9 changes: 8 additions & 1 deletion tx/operation-context.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
);
}

function debugLog(error, message) {
if (isDebugging()) {
console.log(error, message);
}
}


class TimeTracker {
constructor() {
Expand Down Expand Up @@ -584,5 +590,6 @@
TimeTracker,
ResourceCache,
ExpansionCache,
isDebugging
isDebugging,
debugLog
};
Loading
Loading