Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,28 @@ results = cypher_query(model, '''

CLI: `python -m sgraph.cypher model.xml.zip [query]` — supports interactive REPL and 11 output formats (table, csv, tsv, json, jsonl, xml, deps, dot, plantuml, graphml, cytoscape). See `docs/cypher.md` for full documentation.

### Comparing Models
```python
from sgraph.compare.modelcompare import ModelCompare
from sgraph.compare.compareutils import SLIDING_WINDOW_ATTRS

mc = ModelCompare()

# Basic comparison from files
compare_model = mc.compare('model_a.xml', 'model_b.xml')

# Exclude noisy time-windowed metrics (author counts, commit counts, etc.)
compare_model = mc.compare('model_a.xml', 'model_b.xml', exclude_attrs=SLIDING_WINDOW_ATTRS)

# Compare in-memory models
compare_model = mc.compareModels(model1, model2, exclude_attrs={'commit_count_30', 'author_list_7'})

# Inspect results
mc.printCompareInfos(compare_model)
```

The `exclude_attrs` parameter accepts a set of attribute names to ignore during comparison. Use `SLIDING_WINDOW_ATTRS` as a preset to suppress time-windowed metric noise (author/commit/bug counts at various time windows).

## File Locations

- Source code: `src/sgraph/`
Expand Down
11 changes: 5 additions & 6 deletions src/sgraph/compare/attributecomparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ def isListAttribute(attrName):
keys1 = set(attributes1.keys())
keys2 = set(attributes2.keys())

intersection = set(keys1)
intersection.intersection(keys2)
intersection = keys1 & keys2
intersection -= ignoredAttrs

# TODO: Similar logic needed as implemented in desktop
Expand Down Expand Up @@ -78,15 +77,15 @@ def isListAttribute(attrName):
attributes2[attrName])
attrs.add(attrName)

keys1 = filter(lambda x: x not in intersection, keys1)
for attribute_in_a in keys1:
keys1_only = keys1 - intersection - ignoredAttrs
for attribute_in_a in keys1_only:
val = attributes1[attribute_in_a]
if val != '':
outmap[c + '_' + attribute_in_a] = '{};--'.format(val)
attrs.add(attribute_in_a)

keys2 = filter(lambda x: x not in intersection, keys2)
for attribute_in_b in keys2:
keys2_only = keys2 - intersection - ignoredAttrs
for attribute_in_b in keys2_only:
val = attributes2[attribute_in_b]
if val != '':
outmap[c + '_' + attribute_in_b] = '--;{}'.format(val)
Expand Down
27 changes: 27 additions & 0 deletions src/sgraph/compare/compareutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,33 @@

ignoredAttrs = {'days_since_modified'}

# Attrs that are always noise in time-windowed models (sliding window metrics).
# Useful as a preset for exclude_attrs parameter in ModelCompare.compare().
SLIDING_WINDOW_ATTRS = {
'days_since_modified',
'author_list_1', 'author_list_7', 'author_list_30', 'author_list_90',
'author_list_180', 'author_list_365',
'author_count_1', 'author_count_7', 'author_count_30', 'author_count_90',
'author_count_180', 'author_count_365',
'commit_count_1', 'commit_count_7', 'commit_count_30', 'commit_count_90',
'commit_count_180', 'commit_count_365',
'bug_fix_ratio_1', 'bug_fix_ratio_7', 'bug_fix_ratio_30',
'bug_fix_ratio_90', 'bug_fix_ratio_180', 'bug_fix_ratio_365',
'bug_fix_commit_count_1', 'bug_fix_commit_count_7', 'bug_fix_commit_count_30',
'bug_fix_commit_count_90', 'bug_fix_commit_count_180', 'bug_fix_commit_count_365',
'bug_count_1', 'bug_count_7', 'bug_count_30', 'bug_count_90',
'bug_count_180', 'bug_count_365',
'bug_list_1', 'bug_list_7', 'bug_list_30', 'bug_list_90',
'bug_list_180', 'bug_list_365',
'feature_count_1', 'feature_count_7', 'feature_count_30', 'feature_count_90',
'feature_count_180', 'feature_count_365',
'feature_list_1', 'feature_list_7', 'feature_list_30', 'feature_list_90',
'feature_list_180', 'feature_list_365',
'tech_debt_1', 'tech_debt_7', 'tech_debt_30', 'tech_debt_90',
'tech_debt_180', 'tech_debt_365',
'last_modified', 'latest_commits',
}


def tag_change_count(compareElement: SElement, changecount: int):
if changecount > 0:
Expand Down
68 changes: 47 additions & 21 deletions src/sgraph/compare/modelcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sgraph import SElement, SElementAssociation, SGraph
from sgraph.compare.attributecomparison import compare_attrs
from sgraph.compare.comparegraphattrs import CompareGraphAttrs
from sgraph.compare.compareutils import tag_change_count, debunk_uniqueness
from sgraph.compare.compareutils import tag_change_count, debunk_uniqueness, ignoredAttrs
from sgraph.compare.renamedetector import RenameDetector


Expand All @@ -14,28 +14,54 @@ class ModelCompare:
def __init__(self):
pass

def compare(self, path1: str, path2: str):
def compare(self, path1: str, path2: str, exclude_attrs: set[str] | None = None):
"""Compare two models loaded from file paths.

Args:
exclude_attrs: Attribute names to ignore during comparison (e.g. SLIDING_WINDOW_ATTRS).
"""
model1 = SGraph.parse_xml_or_zipped_xml(path1)
model2 = SGraph.parse_xml_or_zipped_xml(path2)
return self.compareModels(model1, model2)

def compareModels(self, model1: SGraph, model2: SGraph, rename_detection: bool = False):
rootNode = SElement(None, '')
compareModel = SGraph(rootNode)
createdDeps: list[SElementAssociation] = []
removedDeps: list[SElementAssociation] = []

# If there is already a attr_temporary.csv that has been "spoiling" the compare,
# by introducing some removed elements to the B model that should not be there..
model2.rootNode.removeDescendantsIf(
lambda x: 'compare' in x.attrs and x.attrs['compare'] == 'removed')
self.compareWith(model1, model2, compareModel, True, createdDeps, removedDeps,
rename_detection)
for r in createdDeps:
r.addAttribute("compare", "added")
for r in removedDeps:
r.addAttribute("compare", "removed")
return compareModel
return self.compareModels(model1, model2, exclude_attrs=exclude_attrs)

def compareModels(self, model1: SGraph, model2: SGraph, rename_detection: bool = False,
exclude_attrs: set[str] | None = None):
"""Compare two in-memory models and produce a compare model annotating differences.

Args:
exclude_attrs: Additional attribute names to ignore during comparison.
Use ``SLIDING_WINDOW_ATTRS`` from ``compareutils`` to suppress
time-windowed metric noise.

Note:
exclude_attrs temporarily extends the global ``ignoredAttrs`` set for the
duration of the comparison and restores it afterwards. Not thread-safe if
called concurrently with different exclude_attrs values.
"""
added_to_ignored = set()
if exclude_attrs:
added_to_ignored = exclude_attrs - ignoredAttrs
ignoredAttrs.update(added_to_ignored)

try:
rootNode = SElement(None, '')
compareModel = SGraph(rootNode)
createdDeps: list[SElementAssociation] = []
removedDeps: list[SElementAssociation] = []

# If there is already a attr_temporary.csv that has been "spoiling" the compare,
# by introducing some removed elements to the B model that should not be there..
model2.rootNode.removeDescendantsIf(
lambda x: 'compare' in x.attrs and x.attrs['compare'] == 'removed')
self.compareWith(model1, model2, compareModel, True, createdDeps, removedDeps,
rename_detection)
for r in createdDeps:
r.addAttribute("compare", "added")
for r in removedDeps:
r.addAttribute("compare", "removed")
return compareModel
finally:
ignoredAttrs.difference_update(added_to_ignored)

# self == SGraph
def compareWith(
Expand Down
51 changes: 38 additions & 13 deletions src/sgraph/selement.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,25 @@ class SElement:
_incoming_index: dict[tuple[int, str], "SElementAssociation"]

def __init__(self, parent: Optional['SElement'], name: str):
"""
Creates an element and attach it under the given parent.
Only case when the parent may be None, is when creating the root element for the graph,
or in case of detached elements.
"""Create an element and **immediately** attach it under *parent*.

The new element is added to ``parent.children`` and
``parent.childrenDict`` during construction. There is no need to
call :meth:`addChild` afterwards — doing so would trigger a merge
or raise :class:`SElementMergedException`.

Pass ``parent=None`` only when creating the graph root or a
temporarily detached element.

Args:
parent: Parent element (``None`` for root / detached).
name: Element name. ``'/'`` characters are normalised to
``'__slash__'``.

:param parent: the parent element (optional), in the end, every element needs parent except the root.
:param name: name of the element, '/' is normalized to '__slash__'
Raises:
Exception: If parent equals self (self loop).
SElementMergedException: If an element with the same name already exists
under the parent (in non-DEBUG mode).
Exception: If *parent* equals *self* (self-loop).
SElementMergedException: If an element with the same *name*
already exists under *parent*.
"""
if name == '':
# sys.stderr.write('Creating with empty name\n')
Expand Down Expand Up @@ -85,10 +93,27 @@ def __str__(self):
return f'{self.name} ({self.getType()}) {children_info} {outbound_info} {inbound_info}'

def addChild(self, child: "SElement") -> Optional["SElement"]:
"""
Add child, but if there is an overlapping element, merge instead and return merged element.
:param child: the child to be added.
:return: None or the element where the child has been merged with (differs from child)
"""Add *child* to this element, merging on name collision.

If a child with the same name already exists, the new child is
**merged** into the existing one and the existing element is
returned.

.. note::
``SElement(parent, name)`` already attaches the new element to
*parent* during construction. Calling ``parent.addChild(elem)``
**again** will trigger the merge/duplicate path — this is almost
never what you want for freshly constructed elements.

Args:
child: Element to add.

Returns:
``None`` if the child was added as-is, or the **existing**
element that *child* was merged into.

Raises:
Exception: If *child* is *self* (self-loop).
"""
if child == self:
sys.stderr.write('Error with data model loop\n')
Expand Down
44 changes: 44 additions & 0 deletions src/sgraph/selementassociation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,28 @@


class SElementAssociation:
"""Represents a directed dependency between two SElements.

Attributes:
fromElement: The source element of the dependency.
toElement: The target element of the dependency.
deptype: The dependency type string (e.g. ``'call'``, ``'import'``).
Named ``deptype`` (not ``type``) to avoid shadowing the Python builtin.
Use :meth:`getType` as an alias.
attrs: Arbitrary key-value metadata attached to this association.

Two-phase construction:
The constructor stores the endpoints but does **not** register the
association on the elements' ``outgoing`` / ``incoming`` lists.
Call :meth:`initElems` afterwards to complete registration::

ea = SElementAssociation(src, tgt, 'import')
ea.initElems() # now visible in src.outgoing & tgt.incoming

Alternatively, use :meth:`create_unique_element_association` which
handles both steps and deduplication in one call.
"""

__slots__ = 'deptype', 'fromElement', 'toElement', 'attrs'

fromElement: SElement
Expand Down Expand Up @@ -57,6 +79,19 @@ def __init__(
deptype: str,
depattrs: dict[str, str | int | list[str]] | None = None,
):
"""Create an association object **without** registering it on the elements.

After construction the association is an inert object — it does not
appear in ``fr.outgoing`` or ``to.incoming``. Call :meth:`initElems`
to complete registration, or use
:meth:`create_unique_element_association` for a one-step alternative.

Args:
fr: Source / from-element.
to: Target / to-element.
deptype: Dependency kind (e.g. ``'call'``, ``'import'``).
depattrs: Optional metadata dict. ``None`` becomes ``{}``.
"""
self.deptype = deptype

# Good to have this decommented when testing new analyzers:
Expand Down Expand Up @@ -103,6 +138,15 @@ def getAttributes(self):
return self.attrs

def initElems(self):
"""Register this association on both endpoint elements.

Appends ``self`` to :attr:`fromElement.outgoing` and
:attr:`toElement.incoming`, and updates the incoming index used
for O(1) duplicate detection.

Must be called exactly once after :meth:`__init__`.
:meth:`create_unique_element_association` calls this automatically.
"""
self.fromElement.outgoing.append(self)
self.toElement.incoming.append(self)
# Maintain index for O(1) duplicate lookup
Expand Down
Loading
Loading