Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/code-analyzer-regex-engine/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@salesforce/code-analyzer-regex-engine",
"description": "Plugin package that adds 'regex' as an engine into Salesforce Code Analyzer",
"version": "0.33.1-SNAPSHOT",
"version": "0.34.0-SNAPSHOT",
"author": "The Salesforce Code Analyzer Team",
"license": "BSD-3-Clause",
"homepage": "https://developer.salesforce.com/docs/platform/salesforce-code-analyzer/overview",
Expand Down
10 changes: 10 additions & 0 deletions packages/code-analyzer-regex-engine/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ export type RegexRule = {
// The regular expression that triggers a violation when matched against the contents of a file.
regex: string;

// [Optional] The negative pattern - matches that also match this pattern will be excluded from violations.
// This allows you to exclude false positives by specifying patterns that should NOT be flagged.
// Example: regex: /(To|From):\s*\$\([^)]+\)/ with regex_ignore: /\$\(validatedMessageId\)/
regex_ignore?: string;

// The extensions of the files that you would like to test the regular expression against.
// If not defined, or equal to null, then all text-based files of any file extension will be tested.
file_extensions?: string[];
Expand Down Expand Up @@ -74,6 +79,10 @@ export function validateAndNormalizeConfig(valueExtractor: ConfigValueExtractor)
const description: string = ruleExtractor.extractRequiredString('description');
const rawRegexString: string = ruleExtractor.extractRequiredString('regex');
const regexString: string = validateRegexString(rawRegexString, ruleExtractor.getFieldPath('regex'));
const rawPatternNotRegex: string | undefined = ruleExtractor.extractString('regex_ignore');
const patternNotRegexString: string | undefined = rawPatternNotRegex
? validateRegexString(rawPatternNotRegex, ruleExtractor.getFieldPath('regex_ignore'))
: undefined;
const rawFileExtensions: string[] | undefined = ruleExtractor.extractArray('file_extensions',
(element, fieldPath) => ValueValidator.validateString(element, fieldPath, FILE_EXT_PATTERN));

Expand All @@ -85,6 +94,7 @@ export function validateAndNormalizeConfig(valueExtractor: ConfigValueExtractor)
severity: ruleExtractor.extractSeverityLevel('severity', DEFAULT_SEVERITY_LEVEL)!,
tags: ruleExtractor.extractArray('tags', ValueValidator.validateString, [COMMON_TAGS.RECOMMENDED, COMMON_TAGS.CUSTOM])!,
...(rawFileExtensions ? { file_extensions: normalizeFileExtensions(rawFileExtensions) } : {}),
...(patternNotRegexString ? { regex_ignore: patternNotRegexString } : {}),
}
}
return {
Expand Down
17 changes: 17 additions & 0 deletions packages/code-analyzer-regex-engine/src/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ export class RegexEngine extends Engine {
const contextuallyDerivedEol: string = contextuallyDeriveEolString(fileContents);
const newlineIndexes: number[] = getNewlineIndices(fileContents, contextuallyDerivedEol);

// Get negative pattern if defined
const patternNotRegex = this.regexRules[ruleName].regex_ignore
? convertToRegex(this.regexRules[ruleName].regex_ignore!)
: undefined;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens if an invalid regex is passed does that validation happen before this ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

**Yes, validation happens before this code executes.

In config.ts (lines 82-84), regex_ignore is validated during configuration parsing:

const rawPatternNotRegex: string | undefined = ruleExtractor.extractString('regex_ignore');
const patternNotRegexString: string | undefined = rawPatternNotRegex
? validateRegexString(rawPatternNotRegex, ruleExtractor.getFieldPath('regex_ignore'))
: undefined;

The validateRegexString() function tests if the pattern is a valid regex and throws a descriptive error if it's invalid. This happens during config loading, before any engine execution.

So by the time engine.ts calls convertToRegex(), the regex pattern has already been validated and is guaranteed to be valid (or the engine wouldn't have started).

Flow:

  1. Config loaded → config.ts validates regex_ignore pattern
  2. Invalid regex → Error thrown, engine doesn't start
  3. Valid regex → Engine starts, engine.ts safely calls convertToRegex()
    **

for (const match of fileContents.matchAll(regex)) {
let startIndex: number = match.index;
let matchLength: number = match[0].length;
Expand All @@ -150,6 +155,18 @@ export class RegexEngine extends Engine {
matchLength = match.groups.target.length;
}

// Skip this match if it also matches the negative pattern
if (patternNotRegex) {
const matchedText = fileContents.substring(startIndex, startIndex + matchLength);
if (patternNotRegex.test(matchedText)) {
// Reset regex state for next iteration
patternNotRegex.lastIndex = 0;
continue;
}
// Reset regex state for next iteration
patternNotRegex.lastIndex = 0;
}

const startLine: number = getLineNumber(startIndex, newlineIndexes);
const startColumn: number = getColumnNumber(startIndex, newlineIndexes, contextuallyDerivedEol);
const endLine: number = getLineNumber(startIndex + matchLength, newlineIndexes);
Expand Down
12 changes: 12 additions & 0 deletions packages/code-analyzer-regex-engine/src/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ const MESSAGE_CATALOG : { [key: string]: string } = {
` {rule_name} is the name you would like to give to your custom rule\n` +
` {rule_property_name} is the name of one of the rule properties. You may specify the following rule properties:\n` +
` 'regex' - The regular expression that triggers a violation when matched against the contents of a file.\n` +
` 'regex_ignore' - [Optional] The negative pattern - matches that also match this pattern will be excluded.\n` +
` This allows you to exclude false positives by specifying patterns that should NOT be flagged.\n` +
` Example: To match email headers with user input but exclude 'validatedMessageId':\n` +
` regex: /(To|From):\\s*\\$\\([^)]+\\)/gi\n` +
` regex_ignore: /\\$\\(\\s*validatedMessageId\\s*\\)/gi\n` +
` 'file_extensions' - The extensions of the files that you would like to test the regular expression against.\n` +
` 'description' - A description of the rule's purpose\n` +
` 'violation_message' - [Optional] The message emitted when a rule violation occurs.\n` +
Expand All @@ -32,6 +37,13 @@ const MESSAGE_CATALOG : { [key: string]: string } = {
` violation_message: "A comment with a TODO statement was found. Please remove TODO statements from your apex code."\n` +
` severity: "Info"\n` +
` tags: ["TechDebt"]\n` +
` "DataWeaveEmailHeaderInjection":\n` +
` regex: /(To|From|Subject):\\s*\\$\\([^)]+\\)/gi\n` +
` regex_ignore: /\\$\\(\\s*validatedMessageId\\s*\\)/gi\n` +
` file_extensions: [".dwl"]\n` +
` description: "Detects user input in email headers, excluding validated IDs."\n` +
` severity: "Critical"\n` +
` tags: ["Security"]\n` +
`-------------------------------------------`,

UnsupportedEngineName:
Expand Down
83 changes: 83 additions & 0 deletions packages/code-analyzer-regex-engine/test/engine.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,89 @@ describe('Tests for runRules', () => {
});
});

describe('Tests for regex_ignore', () => {
it('regex_ignore should exclude matches that match the negative pattern', async () => {
const customRulesWithNegativePattern: RegexRules = {
EmailHeaderInjection: {
regex: '/(To|From|Subject|In-Reply-To|References):\\s*\\$\\([^)]+\\)/gi',
regex_ignore: '/\\$\\(\\s*validatedMessageId\\s*\\)/gi',
description: "Detects user input in email headers, excluding validatedMessageId",
file_extensions: [".dwl"],
violation_message: "User input detected in email header",
severity: SeverityLevel.Critical,
tags: ["Security"]
}
};

const testEngine = new RegexEngine(customRulesWithNegativePattern, RULE_RESOURCE_URLS);
const runOptions: RunOptions = createRunOptions(
new Workspace('id', [path.resolve(__dirname, "test-data", "patternNotRegex")]));
const runResults: EngineRunResults = await testEngine.runRules(["EmailHeaderInjection"], runOptions);

// emailHeaders_WithValidatedId.dwl has $(validatedMessageId) - should be excluded
// emailHeaders_WithUnsanitizedPayload.dwl has $(payload.x) - should be violations
const validatedIdViolations = runResults.violations.filter(v =>
v.codeLocations[0].file.includes('emailHeaders_WithValidatedId.dwl'));
const unsanitizedViolations = runResults.violations.filter(v =>
v.codeLocations[0].file.includes('emailHeaders_WithUnsanitizedPayload.dwl'));

expect(validatedIdViolations).toHaveLength(0); // Should be excluded by regex_ignore
expect(unsanitizedViolations.length).toBeGreaterThan(0); // Should have violations
});

it('Rule without regex_ignore should behave normally', async () => {
const customRulesWithoutNegativePattern: RegexRules = {
EmailHeaderInjection: {
regex: '/(To|From|Subject|In-Reply-To|References):\\s*\\$\\([^)]+\\)/gi',
description: "Detects user input in email headers",
file_extensions: [".dwl"],
violation_message: "User input detected in email header",
severity: SeverityLevel.Critical,
tags: ["Security"]
}
};

const testEngine = new RegexEngine(customRulesWithoutNegativePattern, RULE_RESOURCE_URLS);
const runOptions: RunOptions = createRunOptions(
new Workspace('id', [path.resolve(__dirname, "test-data", "patternNotRegex")]));
const runResults: EngineRunResults = await testEngine.runRules(["EmailHeaderInjection"], runOptions);

// Without regex_ignore, both files should have violations
const validatedIdViolations = runResults.violations.filter(v =>
v.codeLocations[0].file.includes('emailHeaders_WithValidatedId.dwl'));
const unsanitizedViolations = runResults.violations.filter(v =>
v.codeLocations[0].file.includes('emailHeaders_WithUnsanitizedPayload.dwl'));

expect(validatedIdViolations.length).toBeGreaterThan(0); // Should have violations
expect(unsanitizedViolations.length).toBeGreaterThan(0); // Should have violations
});

it('regex_ignore with multiple exclusion patterns', async () => {
const customRulesWithMultipleExclusions: RegexRules = {
EmailHeaderInjection: {
regex: '/(To|From|Subject):\\s*\\$\\([^)]+\\)/gi',
regex_ignore: '/\\$\\((validatedMessageId|sanitizeHeader|getSafeEmailHeader)\\s*[^)]*\\)/gi',
description: "Detects user input in email headers, excluding safe functions",
file_extensions: [".dwl"],
violation_message: "User input detected in email header",
severity: SeverityLevel.Critical,
tags: ["Security"]
}
};

const testEngine = new RegexEngine(customRulesWithMultipleExclusions, RULE_RESOURCE_URLS);
const runOptions: RunOptions = createRunOptions(
new Workspace('id', [path.resolve(__dirname, "test-data", "patternNotRegex")]));
const runResults: EngineRunResults = await testEngine.runRules(["EmailHeaderInjection"], runOptions);

// validatedMessageId should still be excluded
const validatedIdViolations = runResults.violations.filter(v =>
v.codeLocations[0].file.includes('emailHeaders_WithValidatedId.dwl'));

expect(validatedIdViolations).toHaveLength(0);
});
});

describe('Tests for getEngineVersion', () => {
it('Outputs something resembling a Semantic Version', async () => {
const version: string = await engine.getEngineVersion();
Expand Down
34 changes: 34 additions & 0 deletions packages/code-analyzer-regex-engine/test/plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -498,4 +498,38 @@ describe('RegexEnginePlugin Custom Config Tests', () => {
await expect(plugin.createEngineConfig("regex", valueExtractor)).rejects.toThrow(
getMessageFromCatalog(SHARED_MESSAGE_CATALOG,'ConfigValueMustBeOfType', 'engines.regex.custom_rules.NoTodos.tags', 'array', 'string'));
});

it("If user creates a rule with regex_ignore, it should be included in the config", async () => {
const rawConfig = {
custom_rules: {
"EmailHeaderInjection": {
regex: String.raw`/(To|From|Subject):\s*\$\([^)]+\)/gi`,
regex_ignore: String.raw`/\$\(\s*validatedMessageId\s*\)/gi`,
description: "Detects user input in email headers",
file_extensions: [".dwl"]
}
}
};
const valueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.regex');
const resolvedConfig: ConfigObject = await plugin.createEngineConfig("regex", valueExtractor);
const pluginEngine: RegexEngine = await plugin.createEngine("regex", resolvedConfig) as RegexEngine;

expect(pluginEngine._getRegexRules()["EmailHeaderInjection"].regex_ignore).toBeDefined();
expect(pluginEngine._getRegexRules()["EmailHeaderInjection"].regex_ignore).toContain('validatedMessageId');
});

it("If user creates a rule with invalid regex_ignore, ensure correct error is emitted", async () => {
const rawConfig = {
custom_rules: {
"BadRule": {
...SAMPLE_RAW_CUSTOM_RULE_DEFINITION,
regex_ignore: "/bad[pattern/gi"
}
}
};
const valueExtractor: ConfigValueExtractor = new ConfigValueExtractor(rawConfig, 'engines.regex');
await expect(plugin.createEngineConfig("regex", valueExtractor)).rejects.toThrow(
getMessage('InvalidConfigurationValueWithReason', 'engines.regex.custom_rules.BadRule.regex_ignore',
getMessage('InvalidRegexDueToError', '/bad[pattern/gi', "Invalid regular expression: /bad[pattern/gi: Unterminated character class")));
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
%dw 2.0
output application/java
---
{
headers: {
To: $(payload.recipientEmail),
Subject: $(payload.subject),
From: $(payload.fromAddress)
},
body: payload.message
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
%dw 2.0
output application/java
---
{
headers: {
To: "customer@example.com",
Subject: "Property Update",
"In-Reply-To": $(validatedMessageId),
References: $(validatedMessageId),
From: "noreply@dreamhouse.com"
},
body: payload.message
}
Loading