diff --git a/README.md b/README.md index a63fbf25..211ef6a0 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,6 @@ By offering sanitization, detection of harmful language, prevention of data leak **Examples**: -- Get started with [ChatGPT and Tueri](./examples/openai_api.py). - Deploy Tueri as [API](./docs/api/overview.md) ## Supported scanners @@ -25,41 +24,33 @@ By offering sanitization, detection of harmful language, prevention of data leak ### Prompt scanners - [Anonymize](./docs/input_scanners/anonymize.md) -- [BanCode](./docs/input_scanners/ban_code.md) - [BanCompetitors](./docs/input_scanners/ban_competitors.md) - [BanSubstrings](./docs/input_scanners/ban_substrings.md) - [BanTopics](./docs/input_scanners/ban_topics.md) -- [Code](./docs/input_scanners/code.md) -- [Gibberish](./docs/input_scanners/gibberish.md) - [InvisibleText](./docs/input_scanners/invisible_text.md) - [Language](./docs/input_scanners/language.md) +- [MaskCode](./docs/input_scanners/mask_code.md) - [PromptInjection](./docs/input_scanners/prompt_injection.md) - [Regex](./docs/input_scanners/regex.md) - [Secrets](./docs/input_scanners/secrets.md) - [Sentiment](./docs/input_scanners/sentiment.md) - [TokenLimit](./docs/input_scanners/token_limit.md) -- [Toxicity](./docs/input_scanners/toxicity.md) ### Output scanners -- [BanCode](./docs/output_scanners/ban_code.md) +- [BadURL](./docs/output_scanners/bad_url.md) - [BanCompetitors](./docs/output_scanners/ban_competitors.md) - [BanSubstrings](./docs/output_scanners/ban_substrings.md) - [BanTopics](./docs/output_scanners/ban_topics.md) - [Bias](./docs/output_scanners/bias.md) -- [Code](./docs/output_scanners/code.md) - [Deanonymize](./docs/output_scanners/deanonymize.md) +- [FactualConsistency](./docs/output_scanners/factual_consistency.md) - [JSON](./docs/output_scanners/json.md) - [Language](./docs/output_scanners/language.md) - [LanguageSame](./docs/output_scanners/language_same.md) -- [MaliciousURLs](./docs/output_scanners/malicious_urls.md) +- [MaskCode](./docs/input_scanners/mask_code.md) - [NoRefusal](./docs/output_scanners/no_refusal.md) -- [ReadingTime](./docs/output_scanners/reading_time.md) -- [FactualConsistency](./docs/output_scanners/factual_consistency.md) -- [Gibberish](./docs/output_scanners/gibberish.md) - [Regex](./docs/output_scanners/regex.md) - [Relevance](./docs/output_scanners/relevance.md) - [Sensitive](./docs/output_scanners/sensitive.md) - [Sentiment](./docs/output_scanners/sentiment.md) -- [Toxicity](./docs/output_scanners/toxicity.md) -- [URLReachability](./docs/output_scanners/url_reachability.md) diff --git a/docs/input_scanners/mask_code.md b/docs/input_scanners/mask_code.md new file mode 100644 index 00000000..609abe86 --- /dev/null +++ b/docs/input_scanners/mask_code.md @@ -0,0 +1,3 @@ +# Mask Code Scanner + +The `MaskCode` scanner is designed to detect and mask code in the prompt. It also supports programming language identification. \ No newline at end of file diff --git a/docs/output_scanners/bad_url.md b/docs/output_scanners/bad_url.md new file mode 100644 index 00000000..aa37a3ce --- /dev/null +++ b/docs/output_scanners/bad_url.md @@ -0,0 +1,6 @@ +# Bad URL Scanner + +This scanner identifies URLs in the text and checks them for accessibility, ensuring that all URLs are reachable and not broken. +If any URLs are reachable, it is checked for malicious content. + +All URLs are masked appropriately if any such issues are detected. \ No newline at end of file diff --git a/tueri/output_scanners/bad_url.py b/tueri/output_scanners/bad_url.py index 9f64eb18..0053766d 100644 --- a/tueri/output_scanners/bad_url.py +++ b/tueri/output_scanners/bad_url.py @@ -43,7 +43,7 @@ def __init__( self, *, model: Model | None = None, - malicious_threshold: float = 0.5, + threshold: float = 0.5, use_onnx: bool = False, success_status_codes: list[int] | None = None, timeout: int = 5, @@ -51,12 +51,12 @@ def __init__( """ Parameters: model: The model to use for malicious URL detection. - malicious_threshold: The threshold used to determine if the URL is malicious. + threshold: The threshold used to determine if the URL is malicious. use_onnx: Whether to use the ONNX version of the model. success_status_codes: A list of status codes that are considered as successful. timeout: The timeout in seconds for the HTTP requests. """ - self._malicious_threshold = malicious_threshold + self._malicious_threshold = threshold self._timeout = timeout if success_status_codes is None: diff --git a/tueri_api/config/scanners.yml b/tueri_api/config/scanners.yml index dcee77c0..c938113c 100644 --- a/tueri_api/config/scanners.yml +++ b/tueri_api/config/scanners.yml @@ -5,7 +5,7 @@ app: scan_fail_fast: ${SCAN_FAIL_FAST:false} scan_prompt_timeout: ${SCAN_PROMPT_TIMEOUT:30} scan_output_timeout: ${SCAN_OUTPUT_TIMEOUT:30} - lazy_load: ${LAZY_LOAD:true} # TODO: Can be changed to false to allow loading models at startup (speed optimization), but runs into errors causing container to restart + lazy_load: ${LAZY_LOAD:false} rate_limit: enabled: ${RATE_LIMIT_ENABLED:false} diff --git a/tueri_api/entrypoint.sh b/tueri_api/entrypoint.sh index 313b9102..5b6623bd 100755 --- a/tueri_api/entrypoint.sh +++ b/tueri_api/entrypoint.sh @@ -4,4 +4,4 @@ APP_WORKERS=${APP_WORKERS:-1} CONFIG_FILE=${CONFIG_FILE:-./config/scanners.yml} # Uvicorn with workers -uvicorn app.app:create_app --host=0.0.0.0 --port=8000 --workers="$APP_WORKERS" --forwarded-allow-ips="*" --proxy-headers --timeout-keep-alive="2" +uvicorn app.app:create_app --host=0.0.0.0 --port=8000 --factory --workers="$APP_WORKERS" --forwarded-allow-ips="*" --proxy-headers --timeout-keep-alive="2"