Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion intezer_sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.21.11'
__version__ = '1.21.12'
19 changes: 18 additions & 1 deletion intezer_sdk/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Optional
from typing import Union
from typing import List
from urllib.parse import urlparse

import requests
from requests import Response
Expand Down Expand Up @@ -335,6 +336,17 @@ def get_file_analysis_by_id(analysis_id: str, api: IntezerApi = None) -> Optiona
def get_analysis_by_id(analysis_id: str, api: IntezerApi = None) -> Optional[FileAnalysis]:
return get_file_analysis_by_id(analysis_id, api)


def _get_domain(url: str) -> str:
if not url.startswith(('http://', 'https://')):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ignore case

url = 'http://' + url
return urlparse(url).netloc
Comment on lines +340 to +343
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Add type hints and improve error handling.

The function should include type hints and handle invalid URLs gracefully.

-def _get_domain(url: str) -> str:
+def _get_domain(url: str) -> str:
+    """Extract domain from URL, prefixing with http:// if protocol is missing.
+
+    Args:
+        url: URL string to parse
+
+    Returns:
+        str: Domain extracted from URL
+
+    Raises:
+        ValueError: If URL is invalid
+    """
     if not url.startswith(('http://', 'https://')):
         url = 'http://' + url
-    return urlparse(url).netloc
+    try:
+        return urlparse(url).netloc
+    except Exception as e:
+        raise ValueError(f"Invalid URL format: {url}") from e
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def _get_domain(url: str) -> str:
if not url.startswith(('http://', 'https://')):
url = 'http://' + url
return urlparse(url).netloc
def _get_domain(url: str) -> str:
"""Extract domain from URL, prefixing with http:// if protocol is missing.
Args:
url: URL string to parse
Returns:
str: Domain extracted from URL
Raises:
ValueError: If URL is invalid
"""
if not url.startswith(('http://', 'https://')):
url = 'http://' + url
try:
return urlparse(url).netloc
except Exception as e:
raise ValueError(f"Invalid URL format: {url}") from e



def _domain_contains(url: str, search_domain: str) -> bool:
return search_domain in _get_domain(url)


def _clean_url(url: str) -> str:
"""
Remove http:// or https:// or www. from the beginning of the URL,
Expand Down Expand Up @@ -407,7 +419,12 @@ def from_latest_analysis(cls,
url=url,
aggregated_view=True,
api=api)
analyses_ids = [report['analysis_id'] for report in analysis_history_url_result.all()]

analyses_ids = [
report['analysis_id'] for report in analysis_history_url_result.all()
if _domain_contains(report['submitted_url'], url)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's talk, not sure i understand why we need the fix here and not in the actual search

or _domain_contains(report['scanned_url'], url)
]

if not analyses_ids:
return None
Expand Down
17 changes: 17 additions & 0 deletions tests/unit/test_url_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from intezer_sdk import consts
from intezer_sdk import errors
from intezer_sdk.analysis import UrlAnalysis
from intezer_sdk.analysis import _domain_contains
from intezer_sdk.api import get_global_api
from intezer_sdk.consts import OnPremiseVersion
from tests.unit.base_test import BaseTest
Expand Down Expand Up @@ -262,3 +263,19 @@ def test_get_url_latest_analysis_analyses_not_found(self):

# Assert
self.assertIsNone(analysis)


def test_domain_contains_util(self):
# Arrange
url1 = 'http://google.com/scans?email=orenk@intezer.com'
url2 = 'http://intezer.com/scans?email=someone@example.com'
url3 = 'https://www.intezer.com/scans?email=someone@example.com'
url4 = 'https://www.analyze.intezer.com/scans?email=someone@example.com'

url_to_search = 'intezer.com'

# Act + Assert
self.assertFalse(_domain_contains(url1, url_to_search))
self.assertTrue(_domain_contains(url2, url_to_search))
self.assertTrue(_domain_contains(url3, url_to_search))
self.assertTrue(_domain_contains(url4, url_to_search))