diff --git a/arcflow/__init__.py b/arcflow/__init__.py
index f80bba7..7856a3d 100644
--- a/arcflow/__init__.py
+++ b/arcflow/__init__.py
@@ -1 +1,15 @@
-from .main import ArcFlow
\ No newline at end of file
+"""
+ArcFlow package for syncing ArchivesSpace to ArcLight.
+
+To use ArcFlow, import directly from the main module:
+    from arcflow.main import ArcFlow
+
+Services can be imported independently:
+    from arcflow.services.xml_transform_service import XmlTransformService
+    from arcflow.services.agent_service import AgentService
+
+The top-level import is disabled to avoid eager loading of dependencies.
+"""
+
+# Avoid eager imports to allow services to be imported independently
+# from .main import ArcFlow
\ No newline at end of file
diff --git a/arcflow/main.py b/arcflow/main.py
index 430539a..acac689 100644
--- a/arcflow/main.py
+++ b/arcflow/main.py
@@ -19,6 +19,8 @@
 from asnake.client import ASnakeClient
 from multiprocessing.pool import ThreadPool as Pool
 from utils.stage_classifications import extract_labels
+from .services.xml_transform_service import XmlTransformService
+from .services.agent_service import AgentService
 import glob
 
 base_dir = os.path.abspath((__file__) + "/../../")
@@ -115,6 +117,10 @@ def __init__(self, arclight_dir, aspace_dir, solr_url, aspace_solr_url, ead_extr
             self.log.error(f'Error authorizing ASnakeClient: {e}')
             exit(0)
 
+        # Initialize services
+        self.xml_transform = XmlTransformService(client=self.client, log=self.log)
+        self.agent_service = AgentService(client=self.client, log=self.log)
+
 
     def is_running(self):
         """
@@ -262,50 +268,24 @@ def task_resource(self, repo, resource_id, xml_dir, pdf_dir, indent_size=0):
             # (record group/subgroup labels and biographical/historical notes)
             if xml.content:
                 xml_content = xml.content.decode('utf-8')
-                insert_pos = xml_content.find('<archdesc level="collection">')
-
-                if insert_pos != -1:
-                    # Find the position after the closing </did> tag
-                    did_end_pos = xml_content.find('</did>', insert_pos)
-
-                    if did_end_pos != -1:
-                        # Move to after the </did> tag
-                        did_end_pos += len('</did>')
-                        extra_xml = ''
-
-                        # Add record group and subgroup labels
-                        rg_label, sg_label = extract_labels(resource)[1:3]
-                        if rg_label:
-                            extra_xml += f'\n<recordgroup>{xml_escape(rg_label)}</recordgroup>'
-                            if sg_label:
-                                extra_xml += f'\n<subgroup>{xml_escape(sg_label)}</subgroup>'
-
-                        # Handle biographical/historical notes from creator agents
-                        bioghist_content = self.get_creator_bioghist(resource, indent_size=indent_size)
-                        if bioghist_content:
-                            # Check if there's already a bioghist element in the EAD
-                            # Search for existing bioghist after </did> but before </archdesc>
-                            archdesc_end = xml_content.find('</archdesc>', did_end_pos)
-                            search_section = xml_content[did_end_pos:archdesc_end] if archdesc_end != -1 else xml_content[did_end_pos:]
-
-                            # Look for closing </bioghist> tag
-                            existing_bioghist_end = search_section.rfind('</bioghist>')
-
-                            if existing_bioghist_end != -1:
-                                # Found existing bioghist - insert agent elements INSIDE it (before closing tag)
-                                insert_pos = did_end_pos + existing_bioghist_end
-                                xml_content = (xml_content[:insert_pos] +
-                                    f'\n{bioghist_content}\n' +
-                                    xml_content[insert_pos:])
-                            else:
-                                # No existing bioghist - wrap agent elements in parent container
-                                wrapped_content = f'<bioghist>\n{bioghist_content}\n</bioghist>'
-                                extra_xml += f'\n{wrapped_content}'
-
-                        if extra_xml:
-                            xml_content = (xml_content[:did_end_pos] +
-                                extra_xml +
-                                xml_content[did_end_pos:])
+
+                # Add arcuit:creator_id attributes (in a custom namespace) to origination name elements
+                # (links creator names in EAD to their corresponding creator records, e.g., in Solr)
+                xml_content = self.xml_transform.add_creator_ids_to_ead(xml_content, resource, indent_size=indent_size)
+
+                # Get record group and subgroup labels
+                rg_label, sg_label = extract_labels(resource)[1:3]
+
+                # Get biographical/historical notes from creator agents
+                bioghist_content = self.get_creator_bioghist(resource, indent_size=indent_size)
+
+                # Inject all collection metadata using XmlTransformService
+                xml_content = self.xml_transform.inject_collection_metadata(
+                    xml_content,
+                    record_group=rg_label,
+                    subgroup=sg_label,
+                    bioghist_content=bioghist_content
+                )
 
                 xml_content = xml_content.encode('utf-8')
             else:
@@ -634,7 +614,6 @@ def get_creator_bioghist(self, resource, indent_size=0):
         Returns nested bioghist elements for each creator, or None if no creator agents have notes.
         Each bioghist element includes the creator name in a head element and an id attribute.
         """
-        indent = ' ' * indent_size
         bioghist_elements = []
 
         if 'linked_agents' not in resource:
@@ -646,58 +625,16 @@ def get_creator_bioghist(self, resource, indent_size=0):
             if linked_agent.get('role') == 'creator':
                 agent_ref = linked_agent.get('ref')
                 if agent_ref:
-                    try:
-                        agent = self.client.get(agent_ref).json()
-
-                        # Get agent name for head element
-                        agent_name = agent.get('title') or agent.get('display_name', {}).get('sort_name', 'Unknown')
-
-                        # Check for notes in the agent record
-                        if 'notes' in agent:
-                            for note in agent['notes']:
-                                # Look for biographical/historical notes
-                                if note.get('jsonmodel_type') == 'note_bioghist':
-                                    # Get persistent_id for the id attribute
-                                    persistent_id = note.get('persistent_id', '')
-                                    if not persistent_id:
-                                        self.log.error(f'{indent}**ASSUMPTION VIOLATION**: Expected persistent_id in note_bioghist for agent {agent_ref}')
-                                        # Skip creating id attribute if persistent_id is missing
-                                        persistent_id = None
-
-                                    # Extract note content from subnotes
-                                    paragraphs = []
-                                    if 'subnotes' in note:
-                                        for subnote in note['subnotes']:
-                                            if 'content' in subnote:
-                                                # Split content on single newlines to create paragraphs
-                                                content = subnote['content']
-                                                # Handle content as either string or list with explicit type checking
-                                                if isinstance(content, str):
-                                                    # Split on newline and filter out empty strings
-                                                    lines = [line.strip() for line in content.split('\n') if line.strip()]
-                                                elif isinstance(content, list):
-                                                    # Content is already a list - use as is
-                                                    lines = [str(item).strip() for item in content if str(item).strip()]
-                                                else:
-                                                    # Log unexpected content type prominently
-                                                    self.log.error(f'{indent}**ASSUMPTION VIOLATION**: Expected string or list for subnote content in agent {agent_ref}, got {type(content).__name__}')
-                                                    continue
-                                                # Wrap each line in <p> tags
-                                                for line in lines:
-                                                    paragraphs.append(f'<p>{line}</p>')
-
-                                    # Create nested bioghist element if we have paragraphs
-                                    if paragraphs:
-                                        paragraphs_xml = '\n'.join(paragraphs)
-                                        heading = f'Historical Note from {xml_escape(agent_name)} Creator Record'
-                                        # Only include id attribute if persistent_id is available
-                                        if persistent_id:
-                                            bioghist_el = f'<bioghist id="aspace_{persistent_id}"><head>{heading}</head>\n{paragraphs_xml}\n</bioghist>'
-                                        else:
-                                            bioghist_el = f'<bioghist><head>{heading}</head>\n{paragraphs_xml}\n</bioghist>'
-                                        bioghist_elements.append(bioghist_el)
-                    except Exception as e:
-                        self.log.error(f'{indent}Error fetching biographical information for agent {agent_ref}: {e}')
+                    bioghist_data = self.agent_service.get_agent_bioghist_data(
+                        agent_ref, indent_size=indent_size
+                    )
+                    if bioghist_data:
+                        bioghist_xml = self.xml_transform.build_bioghist_element(
+                            bioghist_data['agent_name'],
+                            bioghist_data['persistent_id'],
+                            bioghist_data['paragraphs']
+                        )
+                        bioghist_elements.append(bioghist_xml)
 
         if bioghist_elements:
             # Return the agent bioghist elements (unwrapped)
@@ -879,14 +816,14 @@ def task_agent(self, agent_uri, agents_dir, repo_id=1, indent_size=0):
 
             eac_cpf_xml = response.text
 
-            # Parse the EAC-CPF XML to validate and inspect its structure
-            try:
-                root = ET.fromstring(eac_cpf_xml)
-                self.log.debug(f'{indent}Parsed EAC-CPF XML root element: {root.tag}')
-            except ET.ParseError as e:
-                self.log.error(f'{indent}Failed to parse EAC-CPF XML for {agent_uri}: {e}')
+            # Validate EAC-CPF XML structure
+            if not self.xml_transform.validate_eac_cpf_xml(eac_cpf_xml, agent_uri, indent_size=indent_size):
+                self.log.error(f'{indent}Invalid EAC-CPF XML for {agent_uri}, skipping')
                 return None
 
+            # Add collection ead_ids to resourceRelation creatorOf elements
+            eac_cpf_xml = self.xml_transform.add_collection_links_to_eac_cpf(eac_cpf_xml, indent_size=indent_size)
+
             # Generate creator ID
             creator_id = f'creator_{agent_type}_{agent_id}'
 
diff --git a/arcflow/services/__init__.py b/arcflow/services/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/arcflow/services/agent_service.py b/arcflow/services/agent_service.py
new file mode 100644
index 0000000..35e6a16
--- /dev/null
+++ b/arcflow/services/agent_service.py
@@ -0,0 +1,115 @@
+"""
+Service for fetching and processing agent data from ArchivesSpace.
+
+Handles agent-related operations including:
+- Fetching agent biographical/historical notes
+- Processing note content into structured data
+"""
+
+import logging
+from typing import Optional, List, Dict
+
+
+class AgentService:
+    """Service for agent data fetching and processing."""
+
+    def __init__(self, client, log=None):
+        """
+        Initialize the agent service.
+
+        Args:
+            client: ASnake client for fetching agent data
+            log: Logger instance (optional, creates default if not provided)
+        """
+        self.client = client
+        self.log = log or logging.getLogger(__name__)
+
+    def get_agent_bioghist_data(self, agent_uri: str, indent_size: int = 0) -> Optional[Dict]:
+        """
+        Fetch bioghist DATA for an agent.
+
+        Returns structured data (not XML) so it can be used in different contexts:
+        - Build EAD XML for collections
+        - Build EAC-CPF XML for creator records
+        - Display in a web UI
+        - Export as JSON
+
+        Args:
+            agent_uri: Agent URI from ArchivesSpace (e.g., '/agents/corporate_entities/123')
+            indent_size: Indentation size for logging
+
+        Returns:
+            dict with keys: 'agent_name', 'persistent_id', 'paragraphs'
+            or None if no bioghist found or on error
+        """
+        indent = ' ' * indent_size
+
+        try:
+            agent = self.client.get(agent_uri).json()
+            agent_name = agent.get('title') or agent.get('display_name', {}).get('sort_name', 'Unknown')
+
+            for note in agent.get('notes', []):
+                if note.get('jsonmodel_type') == 'note_bioghist':
+                    persistent_id = note.get('persistent_id')
+                    paragraphs = self._extract_paragraphs(note, agent_uri, indent_size)
+
+                    if paragraphs:
+                        return {
+                            'agent_name': agent_name,
+                            'persistent_id': persistent_id,
+                            'paragraphs': paragraphs
+                        }
+
+            return None  # No bioghist
+
+        except Exception as e:
+            self.log.error(f'{indent}Error fetching agent {agent_uri}: {e}')
+            return None
+
+    def _extract_paragraphs(self, note: dict, agent_uri: str, indent_size: int = 0) -> List[str]:
+        """
+        Extract paragraph content from a bioghist note.
+
+        Args:
+            note: Note dictionary from ArchivesSpace
+            agent_uri: Agent URI for logging purposes
+            indent_size: Indentation size for logging
+
+        Returns:
+            List of plain text paragraph strings (not wrapped in <p> tags)
+        """
+        indent = ' ' * indent_size
+        paragraphs = []
+
+        if 'subnotes' in note:
+            for subnote in note['subnotes']:
+                if 'content' in subnote:
+                    content = subnote['content']
+
+                    # Handle content as either string or list with explicit type checking
+                    if isinstance(content, str):
+                        # Split on newline and filter out empty strings
+                        lines = [line.strip() for line in content.split('\n') if line.strip()]
+                    elif isinstance(content, list):
+                        # Content is already a list - use as is
+                        lines = [str(item).strip() for item in content if str(item).strip()]
+                    else:
+                        # Log unexpected content type prominently
+                        self.log.error(
+                            f'{indent}**ASSUMPTION VIOLATION**: Expected string or list for subnote content '
+                            f'in agent {agent_uri}, got {type(content).__name__}'
+                        )
+                        continue
+
+                    # Add plain text lines (will be wrapped in <p> tags by build_bioghist_element)
+                    for line in lines:
+                        paragraphs.append(line)
+
+        # Log if persistent_id is missing
+        if not note.get('persistent_id'):
+            self.log.error(
+                f'{indent}**ASSUMPTION VIOLATION**: Expected persistent_id in note_bioghist '
+                f'for agent {agent_uri}'
+            )
+
+        return paragraphs
diff --git a/arcflow/services/xml_transform_service.py b/arcflow/services/xml_transform_service.py
new file mode 100644
index 0000000..ea2e3fb
--- /dev/null
+++ b/arcflow/services/xml_transform_service.py
@@ -0,0 +1,445 @@
+"""
+Service for transforming and manipulating XML content.
+
+Handles EAD and EAC-CPF XML transformations including:
+- Adding creator IDs to origination elements
+- Injecting collection metadata (record groups, subgroups, bioghist)
+- Adding collection links to EAC-CPF resourceRelation elements
+- Building bioghist XML elements from structured data
+"""
+
+import re
+from typing import Optional, List
+from lxml import etree
+import logging
+
+
+class XmlTransformService:
+    """Service for XML transformations and manipulations."""
+
+    def __init__(self, client=None, log=None):
+        """
+        Initialize the XML transform service.
+
+        Args:
+            client: ASnake client for fetching resources (optional, needed for some operations)
+            log: Logger instance (optional, creates default if not provided)
+        """
+        self.client = client
+        self.log = log or logging.getLogger(__name__)
+
+    def add_creator_ids_to_ead(self, ead: str, resource: dict, indent_size: int = 0) -> str:
+        """
+        Add arcuit:creator_id attributes to name elements inside <origination> elements in EAD XML.
+
+        Uses a custom namespace (xmlns:arcuit="https://arcuit.library.illinois.edu/ead-extensions") to avoid
+        collisions with standard EAD attributes like authfilenumber.
+
+        Maps linked_agents with role='creator' to origination elements by index order.
+        The arcuit:creator_id value is a creator ID in the format creator_{type}_{id}.
+
+        Args:
+            ead: EAD XML as a string
+            resource: ArchivesSpace resource record with resolved linked_agents
+            indent_size: Indentation size for logging
+
+        Returns:
+            str: Modified EAD XML string with arcuit namespace and creator_id attributes
+        """
+        indent = ' ' * indent_size
+
+        # Extract creator IDs from linked_agents in order
+        creator_ids = []
+        for linked_agent in resource.get('linked_agents', []):
+            if linked_agent.get('role') == 'creator':
+                agent_ref = linked_agent.get('ref', '')
+                match = re.match(r'.*/agents/(corporate_entities|people|families)/(\d+)$', agent_ref)
+                if match:
+                    creator_ids.append(f'creator_{match.group(1)}_{match.group(2)}')
+                else:
+                    self.log.warning(f'{indent}Could not parse creator ID from agent ref: {agent_ref}')
+
+        if not creator_ids:
+            return ead
+
+        try:
+            # Define the Arcuit namespace
+            arcuit_ns = "https://arcuit.library.illinois.edu/ead-extensions"
+
+            # Parse the XML with lxml
+            parser = etree.XMLParser(remove_blank_text=False)
+            root = etree.fromstring(ead.encode('utf-8'), parser)
+            namespace = ''
+            if root.tag.startswith('{'):
+                namespace = root.tag.split('}')[0] + '}'
+
+            # Add arcuit namespace declaration to root element if not present
+            nsmap = root.nsmap.copy() if root.nsmap else {}
+            if 'arcuit' not in nsmap:
+                nsmap['arcuit'] = arcuit_ns
+                # Create a new root element with updated namespace map
+                new_root = etree.Element(root.tag, nsmap=nsmap, attrib=root.attrib)
+                new_root.text = root.text
+                new_root.tail = root.tail
+                for child in root:
+                    new_root.append(child)
+                root = new_root
+
+            # Find all origination elements with label="Creator"
+            creator_idx = 0
+            for origination in root.iter(f'{namespace}origination'):
+                if origination.get('label') == 'Creator' and creator_idx < len(creator_ids):
+                    creator_id = creator_ids[creator_idx]
+
+                    # Find the first name element (corpname, persname, or famname)
+                    name_elem = None
+                    for tag in ['corpname', 'persname', 'famname']:
+                        name_elem = origination.find(f'{namespace}{tag}')
+                        if name_elem is not None:
+                            break
+
+                    if name_elem is not None:
+                        # Add the arcuit:creator_id attribute (always, never skip)
+                        name_elem.set(f'{{{arcuit_ns}}}creator_id', creator_id)
+                        creator_idx += 1
+                    else:
+                        # No eligible name element found
+                        self.log.debug(
+                            f'{indent}No eligible name element in <origination> for creator ID {creator_id}'
+                        )
+
+            # Convert back to string with lxml, preserving XML declaration and namespaces
+            # Serialize to bytes first (which allows xml_declaration), then decode
+            result_bytes = etree.tostring(
+                root, 
+                encoding='UTF-8', 
+                method='xml',
+                pretty_print=False,
+                xml_declaration=True
+            )
+            result = result_bytes.decode('utf-8')
+            return result
+
+        except etree.ParseError as e:
+            self.log.error(f'{indent}Failed to parse EAD XML: {e}. Returning original content.')
+            return ead
+
+    def inject_collection_metadata(
+        self,
+        ead: str,
+        record_group: Optional[str],
+        subgroup: Optional[str],
+        bioghist_content: Optional[str]
+    ) -> str:
+        """
+        Inject ArcFlow metadata into collection EAD XML after </did> tag.
+
+        Adds:
+        - Record group and subgroup classification labels
+        - Biographical/historical notes from creator agents
+
+        Args:
+            ead: EAD XML as a string
+            record_group: Record group label (e.g., "ALA 52 — Library Periodicals")
+            subgroup: Subgroup label (e.g., "ALA 52.2 — Publications")
+            bioghist_content: XML string of bioghist elements to inject
+
+        Returns:
+            str: Modified EAD XML string
+        """
+        try:
+            # Parse the XML with lxml
+            parser = etree.XMLParser(remove_blank_text=False)
+            root = etree.fromstring(ead.encode('utf-8'), parser)
+
+            # Get the namespace, if any
+            namespace = ''
+            if root.tag.startswith('{'):
+                namespace = root.tag.split('}')[0] + '}'
+            
+            archdesc = None
+            for elem in root.iter(f'{namespace}archdesc'):
+                if elem.get('level') == 'collection':
+                    archdesc = elem
+                    break
+            
+            if archdesc is None:
+                return ead
+            
+            did = archdesc.find(f'{namespace}did')
+            if did is None:
+                return ead
+            
+            did_index = list(archdesc).index(did)
+            insert_index = did_index + 1
+            
+            if record_group:
+                recordgroup = etree.Element(f'{namespace}recordgroup')
+                recordgroup.text = record_group
+                archdesc.insert(insert_index, recordgroup)
+                insert_index += 1
+                
+                if subgroup:
+                    subgroup_elem = etree.Element(f'{namespace}subgroup')
+                    subgroup_elem.text = subgroup
+                    archdesc.insert(insert_index, subgroup_elem)
+                    insert_index += 1
+            
+            if bioghist_content:
+                existing_bioghist = None
+                for elem in archdesc:
+                    if elem.tag == f'{namespace}bioghist':
+                        existing_bioghist = elem
+                        break
+                
+                try:
+                    # Wrap in a temporary root to handle multiple bioghist elements
+                    bioghist_wrapper = etree.fromstring(f'<wrapper>{bioghist_content}</wrapper>'.encode('utf-8'))
+                    bioghist_elements = list(bioghist_wrapper)
+
+                    def _qualify_namespace(elem):
+                        """
+                        Ensure elem and its descendants use the same namespace as the
+                        source EAD document when a default namespace is present.
+                        """
+                        if not namespace:
+                            return
+                        for child in elem.iter():
+                            if isinstance(child.tag, str) and not child.tag.startswith('{'):
+                                child.tag = f'{namespace}{child.tag}'
+                    
+                    if existing_bioghist is not None:
+                        for bioghist_elem in bioghist_elements:
+                            _qualify_namespace(bioghist_elem)
+                            existing_bioghist.append(bioghist_elem)
+                    else:
+                        # No existing bioghist: insert each parsed bioghist element
+                        # directly into archdesc to preserve creator-level wrappers
+                        # and attributes (e.g., id) returned by get_creator_bioghist.
+                        for bioghist_elem in bioghist_elements:
+                            _qualify_namespace(bioghist_elem)
+                            archdesc.insert(insert_index, bioghist_elem)
+                            insert_index += 1
+                        
+                except etree.ParseError as e:
+                    self.log.warning(f'Failed to parse bioghist content: {e}')
+            
+            result_bytes = etree.tostring(
+                root,
+                encoding='UTF-8',
+                method='xml',
+                pretty_print=False,
+                xml_declaration=True
+            )
+            result = result_bytes.decode('utf-8')
+            return result
+            
+        except etree.ParseError as e:
+            self.log.error(f'Failed to parse EAD XML: {e}. Returning original content.')
+            return ead
+
+    def add_collection_links_to_eac_cpf(self, eac_cpf_xml: str, indent_size: int = 0) -> str:
+        """
+        Add <descriptiveNote><p>ead_id:{ead_id}</p></descriptiveNote> to
+        <resourceRelation resourceRelationType="creatorOf"> elements in EAC-CPF XML.
+
+        For each creatorOf resourceRelation, fetches the linked ArchivesSpace resource
+        to obtain its ead_id. If a resource cannot be fetched (deleted, unpublished, etc.),
+        logs a warning and skips that collection link.
+
+        Args:
+            eac_cpf_xml: EAC-CPF XML as a string
+            indent_size: Indentation size for logging
+
+        Returns:
+            str: Modified EAC-CPF XML string
+
+        Raises:
+            ValueError: If client is not configured (required for fetching resources)
+        """
+        if not self.client:
+            raise ValueError("Client is required for add_collection_links_to_eac_cpf operation")
+
+        indent = ' ' * indent_size
+        
+        # Save the original XML to return if no changes are made
+        original_xml = eac_cpf_xml
+
+        try:
+            # Parse the XML with lxml, handling potential namespace issues
+            parser = etree.XMLParser(remove_blank_text=False)
+            try:
+                root = etree.fromstring(eac_cpf_xml.encode('utf-8'), parser)
+            except etree.ParseError:
+                # If parsing fails, it might be due to undeclared namespaces
+                # Try to fix by adding namespace declarations
+                if 'xlink:' in eac_cpf_xml and 'xmlns:xlink' not in eac_cpf_xml:
+                    # Add xlink namespace declaration to root element
+                    eac_cpf_xml = eac_cpf_xml.replace('<eac-cpf>', '<eac-cpf xmlns:xlink="http://www.w3.org/1999/xlink">', 1)
+                root = etree.fromstring(eac_cpf_xml.encode('utf-8'), parser)
+            
+            # Detect EAC-CPF namespace
+            namespace = ''
+            if root.tag.startswith('{'):
+                namespace = root.tag.split('}')[0] + '}'
+            
+            # Track if any changes were made
+            changes_made = False
+            
+            # Find all resourceRelation elements with resourceRelationType="creatorOf"
+            for resource_relation in root.iter(f'{namespace}resourceRelation'):
+                if resource_relation.get('resourceRelationType') != 'creatorOf':
+                    continue
+                
+                # Check if descriptiveNote with ead_id pattern already exists
+                has_ead_id_note = False
+                for desc_note in resource_relation.findall(f'{namespace}descriptiveNote'):
+                    for p in desc_note.findall(f'{namespace}p'):
+                        if p.text and p.text.startswith('ead_id:'):
+                            has_ead_id_note = True
+                            break
+                    if has_ead_id_note:
+                        break
+                
+                if has_ead_id_note:
+                    # Already has our descriptiveNote, skip
+                    continue
+                
+                # Extract href attribute - try multiple variations
+                href = None
+                # Try with xlink namespace
+                for attr_key in resource_relation.attrib:
+                    if 'href' in attr_key:
+                        href = resource_relation.attrib[attr_key]
+                        break
+                
+                if not href:
+                    continue
+                
+                # Only process resource URLs (skip digital_objects, etc.)
+                # Pattern: repositories/{number}/resources/{number}
+                uri_match = re.search(r'/repositories/(\d+)/resources/(\d+)', href)
+                if not uri_match:
+                    # Not a resource URL (likely digital_object or other type) - skip silently
+                    continue
+                
+                res_repo_id = uri_match.group(1)
+                res_resource_id = uri_match.group(2)
+                
+                # Fetch resource to get ead_id; skip on any error
+                try:
+                    response = self.client.get(f'/repositories/{res_repo_id}/resources/{res_resource_id}')
+                    if response.status_code != 200:
+                        self.log.warning(
+                            f'{indent}Could not fetch resource {href}: HTTP {response.status_code}. '
+                            'Skipping collection link.')
+                        continue
+                    
+                    resource = response.json()
+                    ead_id = resource.get('ead_id')
+                    if not ead_id:
+                        self.log.warning(
+                            f'{indent}Resource /repositories/{res_repo_id}/resources/{res_resource_id} '
+                            'has no ead_id. Skipping collection link.')
+                        continue
+                    
+                    # Create descriptiveNote element with ead_id (namespace-aware)
+                    descriptive_note = etree.Element(f'{namespace}descriptiveNote')
+                    p = etree.SubElement(descriptive_note, f'{namespace}p')
+                    p.text = f'ead_id:{ead_id}'
+                    
+                    # Append to resourceRelation
+                    resource_relation.append(descriptive_note)
+                    changes_made = True
+                    
+                except Exception as e:
+                    self.log.warning(f'{indent}Could not fetch resource for {href}: {e}. Skipping collection link.')
+                    continue
+            
+            # Only convert back to string if changes were made
+            if changes_made:
+                result_bytes = etree.tostring(
+                    root,
+                    encoding='UTF-8',
+                    method='xml',
+                    pretty_print=False,
+                    xml_declaration=True
+                )
+                result = result_bytes.decode('utf-8')
+                return result
+            else:
+                # Return original XML (not the potentially modified version with namespace)
+                return original_xml
+            
+        except etree.ParseError as e:
+            self.log.error(f'{indent}Failed to parse EAC-CPF XML: {e}. Returning original content.')
+            return original_xml
+
+    def build_bioghist_element(
+        self,
+        agent_name: str,
+        persistent_id: Optional[str],
+        paragraphs: List[str]
+    ) -> str:
+        """
+        Build bioghist XML element from structured data using lxml for proper escaping.
+
+        Args:
+            agent_name: Name of the agent for the head element
+            persistent_id: Persistent ID for the bioghist element (optional)
+            paragraphs: List of plain text paragraph strings (will be wrapped in <p> tags with proper escaping)
+
+        Returns:
+            str: Bioghist XML element as a string
+        """
+        # Create bioghist element
+        bioghist = etree.Element('bioghist')
+        
+        # Add id attribute if persistent_id is available
+        if persistent_id:
+            bioghist.set('id', f'aspace_{persistent_id}')
+        
+        # Create head element with escaped text
+        head = etree.SubElement(bioghist, 'head')
+        head.text = f'Historical Note from {agent_name} Creator Record'
+        
+        # Create <p> elements from plain text paragraphs
+        # lxml automatically handles XML escaping
+        for paragraph_text in paragraphs:
+            p = etree.SubElement(bioghist, 'p')
+            p.text = paragraph_text
+        
+        # Convert to string (no XML declaration for fragments)
+        return etree.tostring(bioghist, encoding='unicode', method='xml')
+
+    def validate_eac_cpf_xml(self, eac_cpf_xml: str, agent_uri: str, indent_size: int = 0) -> Optional['etree._Element']:
+        """
+        Parse and validate EAC-CPF XML structure.
+
+        Args:
+            eac_cpf_xml: EAC-CPF XML as a string
+            agent_uri: Agent URI for logging purposes
+            indent_size: Indentation size for logging
+
+        Returns:
+            lxml Element if valid, None if parsing fails
+        """
+        indent = ' ' * indent_size
+
+        try:
+            # Try to parse with lxml, with fallback for missing xlink namespace
+            parser = etree.XMLParser(remove_blank_text=False)
+            try:
+                root = etree.fromstring(eac_cpf_xml.encode('utf-8'), parser)
+            except etree.ParseError:
+                # If parsing fails, it might be due to undeclared namespaces
+                if 'xlink:' in eac_cpf_xml and 'xmlns:xlink' not in eac_cpf_xml:
+                    # Add xlink namespace declaration to root element
+                    eac_cpf_xml = eac_cpf_xml.replace('<eac-cpf>', '<eac-cpf xmlns:xlink="http://www.w3.org/1999/xlink">', 1)
+                root = etree.fromstring(eac_cpf_xml.encode('utf-8'), parser)
+            
+            self.log.debug(f'{indent}Parsed EAC-CPF XML root element: {root.tag}')
+            return root
+        except etree.ParseError as e:
+            self.log.error(f'{indent}Failed to parse EAC-CPF XML for {agent_uri}: {e}')
+            return None
\ No newline at end of file
diff --git a/example_traject_config_eac_cpf.rb b/example_traject_config_eac_cpf.rb
index 177da4f..7b804f3 100644
--- a/example_traject_config_eac_cpf.rb
+++ b/example_traject_config_eac_cpf.rb
@@ -203,6 +203,14 @@
   end
 end
 
+# Related Agents - Parallel array of names to match relationship ids, uris and type
+to_field 'related_agent_names_ssim' do |record, accumulator|
+  relations = record.xpath('//eac:cpfDescription/eac:relations/eac:cpfRelation/eac:relationEntry', EAC_NS)
+  relations.each do |rel|
+    accumulator << rel.text
+  end
+end
+
 # Related Agents - Parallel array of relationship types to match relationship ids and uris
 to_field 'related_agent_relationship_types_ssim' do |record, accumulator|
   relations = record.xpath('//eac:cpfDescription/eac:relations/eac:cpfRelation', EAC_NS)
@@ -224,6 +232,66 @@
   end
 end
 
+# Collections this creator is responsible for - EAD IDs injected by arcflow
+# into <resourceRelation resourceRelationType="creatorOf"> elements as:
+#   <descriptiveNote><p>ead_id:{ead_id}</p></descriptiveNote>
+# Indexed as an array of EAD IDs (e.g., ["ALA.9.5.16"]) for bidirectional
+# creator↔collection linking in Solr.
+to_field 'creator_of_collection__collection_ids_ssim' do |record, accumulator|
+  relations = record.xpath(
+    '//eac:cpfDescription/eac:relations/eac:resourceRelation[@resourceRelationType="creatorOf"]',
+    EAC_NS
+  )
+  relations.each do |rel|
+    note = rel.xpath('eac:descriptiveNote/eac:p', EAC_NS).first
+    if note && note.text =~ /\Aead_id:(.+)\z/
+      accumulator << $1.strip
+    end
+  end
+end
+
+to_field 'creator_of_collection__collection_name_ssim' do |record, accumulator|
+  relations = record.xpath(
+    '//eac:cpfDescription/eac:relations/eac:resourceRelation[@resourceRelationType="creatorOf"]',
+    EAC_NS
+  )
+  relations.each do |rel|
+    note = rel.xpath('eac:descriptiveNote/eac:p', EAC_NS).first
+    if note && note.text =~ /\Aead_id:(.+)\z/
+      name = rel.xpath('eac:relationEntry', EAC_NS)
+      accumulator << name.text
+    end
+  end
+end
+
+
+to_field 'creator_of_digital_object__do_ids_ssim' do |record, accumulator|
+  relations = record.xpath(
+    '//eac:cpfDescription/eac:relations/eac:resourceRelation[@resourceRelationType="creatorOf"]',
+    EAC_NS
+  )
+  relations.each do |rel|
+    href = rel['href'] || rel['xlink:href']
+    if href.include? "digital_object"
+      accumulator << href
+    end
+  end
+end
+
+to_field 'subject_of_digital_object__do_ids_ssim' do |record, accumulator|
+  relations = record.xpath(
+    '//eac:cpfDescription/eac:relations/eac:resourceRelation[@resourceRelationType="subjectOf"]',
+    EAC_NS
+  )
+  relations.each do |rel|
+    href = rel['href'] || rel['xlink:href']
+    if href.include? "digital_object"
+      accumulator << href
+    end
+  end
+end
+
+
 # Agent source URI (from original ArchivesSpace)
 to_field 'agent_uri_ssi' do |record, accumulator|
   # Try to extract from control section or otherRecordId
@@ -238,11 +306,6 @@
   accumulator << Time.now.utc.iso8601
 end
 
-# # Document type marker
-# to_field 'document_type' do |record, accumulator|
-#   accumulator << 'creator'
-# end
-
 # Log successful indexing
 each_record do |record, context|
   record_id = record.xpath('//eac:control/eac:recordId', EAC_NS).first
diff --git a/example_traject_config_ead_extra.rb b/example_traject_config_ead_extra.rb
new file mode 100644
index 0000000..8ad70a7
--- /dev/null
+++ b/example_traject_config_ead_extra.rb
@@ -0,0 +1,66 @@
+# Example Traject extra config for EAD collection indexing.
+# You can copy this file into Arclight (or a theme you have modifying Arclight,
+# e.g., Arcuit):
+# {arclight_dir}/lib/arcuit/traject/ead_extra_config.rb
+#
+# Any additional Traject commands you add to this file will be added to collection
+# records in Arclight.
+#
+# This file shows the fields that arcflow injects into EAD XML to support:
+# 1. Record group and sub-group categories
+# 2. Solr ID for the creator records also created by arcflow
+#
+# GROUP + SUB-GROUP
+# Arcflow adds <recordgroup> and <subgroup> elements directly after </did>
+#   <recordgroup>ALA 52 — Library Periodicals Round Table</recordgroup>
+#   <subgroup>ALA 52.2 — Publications</subgroup>
+#
+# CREATOR RECORDS
+# Arcflow adds arcuit:creator_id attributes to origination name elements
+# using a custom namespace to avoid collisions with existing authfilenumber values:
+#   <ead xmlns="urn:isbn:1-931666-22-9"
+#        xmlns:arcuit="https://arcuit.library.illinois.edu/ead-extensions">
+#     <origination label="Creator">
+#       <corpname source="lcnaf"
+#                 authfilenumber="n79043912"
+#                 arcuit:creator_id="creator_corporate_entities_123">
+#         ALA Allied Professional Association
+#       </corpname>
+#     </origination>
+#   </ead>
+
+#
+
+# Creator ArcLight IDs - extracted from arcuit:creator_id attributes on origination
+# name elements (<corpname>, <persname>, <famname>) injected by arcflow.
+# Uses custom namespace xmlns:arcuit="https://arcuit.library.illinois.edu/ead-extensions"
+# Indexed as an array of creator IDs (e.g., ["creator_corporate_entities_123"])
+# for bidirectional creator↔collection linking in Solr.
+to_field 'creator_arclight_ids_ssim' do |record, accumulator|
+  # Define namespace
+  arcuit_ns = {'arcuit' => 'https://arcuit.library.illinois.edu/ead-extensions',
+               'ead' => 'urn:isbn:1-931666-22-9'}
+
+  # Extract arcuit:creator_id from origination name elements
+  record.xpath('//ead:archdesc/ead:did/ead:origination/ead:corpname[@arcuit:creator_id] |
+                //ead:archdesc/ead:did/ead:origination/ead:persname[@arcuit:creator_id] |
+                //ead:archdesc/ead:did/ead:origination/ead:famname[@arcuit:creator_id]',
+               arcuit_ns).each do |node|
+    accumulator << node['arcuit:creator_id']
+  end
+
+  # Also check without EAD namespace (some ASpace EAD exports omit it)
+  if accumulator.empty?
+    record.xpath('//archdesc/did/origination/corpname[@arcuit:creator_id] |
+                  //archdesc/did/origination/persname[@arcuit:creator_id] |
+                  //archdesc/did/origination/famname[@arcuit:creator_id]',
+                 arcuit_ns).each do |node|
+      accumulator << node['arcuit:creator_id']
+    end
+  end
+end
+
+# Record group and sub-group - extracted from recordgroup and subgroup elements
+# injected by Arcflow into EAD documents created by ArchivesSpace
+to_field 'record_group_ssim', extract_xpath('/ead/archdesc/recordgroup')
+to_field 'subgroup_ssim', extract_xpath('/ead/archdesc/subgroup')
diff --git a/requirements.txt b/requirements.txt
index 6efbe65..84174a0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 ArchivesSnake
-pyyaml
\ No newline at end of file
+pyyaml
+lxml
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_agent_service.py b/tests/test_agent_service.py
new file mode 100644
index 0000000..fde3792
--- /dev/null
+++ b/tests/test_agent_service.py
@@ -0,0 +1,257 @@
+"""
+Tests for AgentService.
+"""
+
+import unittest
+from unittest.mock import Mock
+from arcflow.services.agent_service import AgentService
+
+
+class TestAgentService(unittest.TestCase):
+    """Test cases for AgentService."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_client = Mock()
+        self.mock_log = Mock()
+        self.service = AgentService(client=self.mock_client, log=self.mock_log)
+
+    def test_get_agent_bioghist_data_success(self):
+        """Test successfully fetching agent bioghist data."""
+        # Mock agent response
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'abc123',
+                    'subnotes': [
+                        {'content': 'First paragraph.\nSecond paragraph.'}
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/corporate_entities/123')
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result['agent_name'], 'Test Agent')
+        self.assertEqual(result['persistent_id'], 'abc123')
+        self.assertEqual(len(result['paragraphs']), 2)
+        self.assertIn('First paragraph.', result['paragraphs'])
+        self.assertIn('Second paragraph.', result['paragraphs'])
+
+    def test_get_agent_bioghist_data_no_bioghist(self):
+        """Test fetching agent with no bioghist notes."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': []
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/corporate_entities/123')
+
+        self.assertIsNone(result)
+
+    def test_get_agent_bioghist_data_with_list_content(self):
+        """Test handling subnote content as a list."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'xyz789',
+                    'subnotes': [
+                        {'content': ['First item', 'Second item']}
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/people/456')
+
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result['paragraphs']), 2)
+        self.assertIn('First item', result['paragraphs'])
+        self.assertIn('Second item', result['paragraphs'])
+
+    def test_get_agent_bioghist_data_filters_empty_lines(self):
+        """Test that empty lines are filtered out."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'def456',
+                    'subnotes': [
+                        {'content': 'Line 1\n\n\nLine 2\n  \nLine 3'}
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/families/789')
+
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result['paragraphs']), 3)
+        self.assertIn('Line 1', result['paragraphs'])
+        self.assertIn('Line 2', result['paragraphs'])
+        self.assertIn('Line 3', result['paragraphs'])
+
+    def test_get_agent_bioghist_data_missing_persistent_id(self):
+        """Test handling bioghist note without persistent_id."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    # No persistent_id
+                    'subnotes': [
+                        {'content': 'Some content'}
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/corporate_entities/999')
+
+        self.assertIsNotNone(result)
+        self.assertIsNone(result['persistent_id'])
+        # Should log error about missing persistent_id
+        self.mock_log.error.assert_called()
+        error_call = str(self.mock_log.error.call_args)
+        self.assertIn('ASSUMPTION VIOLATION', error_call)
+        self.assertIn('persistent_id', error_call)
+
+    def test_get_agent_bioghist_data_invalid_content_type(self):
+        """Test handling unexpected content type."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'ghi123',
+                    'subnotes': [
+                        {'content': {'unexpected': 'dict'}}  # Invalid type
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/corporate_entities/111')
+
+        # Should return None when no valid paragraphs are extracted
+        self.assertIsNone(result)
+        # Should log error about unexpected type
+        self.mock_log.error.assert_called()
+        error_calls = [str(call) for call in self.mock_log.error.call_args_list]
+        error_text = ''.join(error_calls)
+        self.assertIn('ASSUMPTION VIOLATION', error_text)
+        self.assertIn('dict', error_text)
+
+    def test_get_agent_bioghist_data_uses_display_name_fallback(self):
+        """Test using display_name.sort_name when title is missing."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            # No 'title' field
+            'display_name': {'sort_name': 'Fallback Name'},
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'jkl456',
+                    'subnotes': [
+                        {'content': 'Some content'}
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/people/222')
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result['agent_name'], 'Fallback Name')
+
+    def test_get_agent_bioghist_data_handles_exception(self):
+        """Test handling exceptions during agent fetch."""
+        self.mock_client.get.side_effect = Exception('Network error')
+
+        result = self.service.get_agent_bioghist_data('/agents/corporate_entities/333')
+
+        self.assertIsNone(result)
+        self.mock_log.error.assert_called()
+        error_call = str(self.mock_log.error.call_args)
+        self.assertIn('Network error', error_call)
+
+    def test_get_agent_bioghist_data_multiple_subnotes(self):
+        """Test handling multiple subnotes in a bioghist note."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'mno789',
+                    'subnotes': [
+                        {'content': 'First subnote'},
+                        {'content': 'Second subnote'},
+                        {'content': 'Third subnote'}
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/families/444')
+
+        self.assertIsNotNone(result)
+        self.assertEqual(len(result['paragraphs']), 3)
+        self.assertIn('First subnote', result['paragraphs'])
+        self.assertIn('Second subnote', result['paragraphs'])
+        self.assertIn('Third subnote', result['paragraphs'])
+
+    def test_get_agent_bioghist_data_returns_first_bioghist_only(self):
+        """Test that only the first bioghist note is returned."""
+        mock_response = Mock()
+        mock_response.json.return_value = {
+            'title': 'Test Agent',
+            'notes': [
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'first123',
+                    'subnotes': [
+                        {'content': 'First bioghist'}
+                    ]
+                },
+                {
+                    'jsonmodel_type': 'note_bioghist',
+                    'persistent_id': 'second456',
+                    'subnotes': [
+                        {'content': 'Second bioghist'}
+                    ]
+                }
+            ]
+        }
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.get_agent_bioghist_data('/agents/corporate_entities/555')
+
+        self.assertIsNotNone(result)
+        self.assertEqual(result['persistent_id'], 'first123')
+        self.assertIn('First bioghist', result['paragraphs'])
+        self.assertNotIn('Second bioghist', result['paragraphs'])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/test_xml_transform_service.py b/tests/test_xml_transform_service.py
new file mode 100644
index 0000000..b49ec10
--- /dev/null
+++ b/tests/test_xml_transform_service.py
@@ -0,0 +1,506 @@
+"""
+Tests for XmlTransformService.
+"""
+
+import unittest
+from unittest.mock import Mock
+from arcflow.services.xml_transform_service import XmlTransformService
+
+# Real ArchivesSpace EAD fixture with namespace
+REAL_EAD_WITH_NAMESPACE = '''<?xml version="1.0" encoding="UTF-8"?>
+<ead xmlns="urn:isbn:1-931666-22-9" xmlns:xlink="http://www.w3.org/1999/xlink">
+  <eadheader>
+    <eadid>test-collection</eadid>
+  </eadheader>
+  <archdesc level="collection">
+    <did>
+      <unittitle>Test Collection with Namespace</unittitle>
+      <origination label="Creator">
+        <corpname source="lcnaf">Test Corporation</corpname>
+      </origination>
+    </did>
+  </archdesc>
+</ead>'''
+
+# Real EAC-CPF fixture with namespace
+REAL_EAC_CPF_WITH_NAMESPACE = '''<?xml version="1.0" encoding="UTF-8"?>
+<eac-cpf xmlns="urn:isbn:1-931666-33-4" xmlns:xlink="http://www.w3.org/1999/xlink">
+  <control>
+    <recordId>test-agent</recordId>
+  </control>
+  <cpfDescription>
+    <relations>
+      <resourceRelation resourceRelationType="creatorOf" 
+                       xlink:href="https://aspace.test/repositories/2/resources/123">
+        <relationEntry>Test Collection</relationEntry>
+      </resourceRelation>
+    </relations>
+  </cpfDescription>
+</eac-cpf>'''
+
+class TestXmlTransformService(unittest.TestCase):
+    """Test cases for XmlTransformService."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.mock_client = Mock()
+        self.mock_log = Mock()
+        self.service = XmlTransformService(client=self.mock_client, log=self.mock_log)
+
+    def test_add_creator_ids_to_ead(self):
+        """Test adding arcuit:creator_id attributes to origination elements."""
+
+        resource = {
+            'linked_agents': [
+                {'role': 'creator', 'ref': '/agents/corporate_entities/123'}
+            ]
+        }
+
+        result = self.service.add_creator_ids_to_ead(REAL_EAD_WITH_NAMESPACE, resource)
+
+        # Should contain arcuit namespace declaration
+        self.assertIn('xmlns:arcuit', result)
+        self.assertIn('https://arcuit.library.illinois.edu/ead-extensions', result)
+        # Should contain the creator_id attribute
+        self.assertIn('creator_id="creator_corporate_entities_123"', result)
+        # Should preserve EAD namespace
+        self.assertIn('urn:isbn:1-931666-22-9', result)
+        # Should still find and modify the corpname element
+        self.assertIn('corpname', result)
+
+    def test_add_creator_ids_multiple_creators(self):
+        """Test adding arcuit:creator_id to multiple origination elements."""
+        xml_content = '''<ead>
+<origination label="Creator">
+  <corpname source="lcnaf">First Corp</corpname>
+</origination>
+<origination label="Creator">
+  <persname source="lcnaf">Second Person</persname>
+</origination>
+</ead>'''
+
+        resource = {
+            'linked_agents': [
+                {'role': 'creator', 'ref': '/agents/corporate_entities/123'},
+                {'role': 'creator', 'ref': '/agents/people/456'}
+            ]
+        }
+
+        result = self.service.add_creator_ids_to_ead(xml_content, resource)
+
+        self.assertIn('creator_id="creator_corporate_entities_123"', result)
+        self.assertIn('creator_id="creator_people_456"', result)
+        self.assertIn('xmlns:arcuit', result)
+
+    def test_add_creator_ids_no_creators(self):
+        """Test that XML is unchanged when there are no creators."""
+        xml_content = '<ead><origination><corpname>Test</corpname></origination></ead>'
+        resource = {'linked_agents': []}
+
+        result = self.service.add_creator_ids_to_ead(xml_content, resource)
+
+        self.assertEqual(xml_content, result)
+
+    def test_inject_collection_metadata_with_all_fields(self):
+        """Test injecting record group, subgroup, and bioghist."""
+        xml_content = '''<?xml version="1.0" encoding="UTF-8"?>
+        <ead xmlns="urn:isbn:1-931666-22-9">
+          <archdesc level="collection">
+            <did>
+              <unittitle>Test Collection</unittitle>
+            </did>
+          </archdesc>
+        </ead>'''
+
+        result = self.service.inject_collection_metadata(
+            xml_content,
+            record_group='RG 1 — Test Group',
+            subgroup='SG 1.1 — Test Subgroup',
+            bioghist_content='<bioghist><p>Test bioghist</p></bioghist>'
+        )
+
+        # Should add recordgroup with namespace
+        self.assertIn('recordgroup', result)
+        self.assertIn('RG 1 — Test Group', result)
+        # Should add subgroup with namespace
+        self.assertIn('subgroup', result)
+        self.assertIn('SG 1.1 — Test Subgroup', result)
+        # Should add bioghist with EAD namespace
+        self.assertIn('bioghist', result)
+        self.assertIn('Test bioghist', result)
+        # Should preserve original namespace
+        self.assertIn('xmlns', result)
+        self.assertIn('urn:isbn:1-931666-22-9', result)
+
+    def test_inject_collection_metadata_into_existing_bioghist(self):
+        """Test that bioghist content is inserted into existing bioghist element."""
+        xml_content = '''<ead>
+<archdesc level="collection">
+  <did>
+    <unittitle>Test Collection</unittitle>
+  </did>
+  <bioghist>
+    <p>Existing content</p>
+  </bioghist>
+</archdesc>
+</ead>'''
+
+        result = self.service.inject_collection_metadata(
+            xml_content,
+            record_group=None,
+            subgroup=None,
+            bioghist_content='<bioghist><p>New content</p></bioghist>'
+        )
+
+        # Should insert before </bioghist>
+        self.assertIn('Existing content', result)
+        self.assertIn('New content', result)
+        # Should not create a new bioghist wrapper
+        self.assertEqual(result.count('<bioghist>'), 2)  # Original + inserted
+
+    def test_inject_collection_metadata_xml_escaping(self):
+        """Test that special XML characters are properly escaped."""
+        xml_content = '''<ead>
+<archdesc level="collection">
+  <did>
+    <unittitle>Test</unittitle>
+  </did>
+</archdesc>
+</ead>'''
+
+        result = self.service.inject_collection_metadata(
+            xml_content,
+            record_group='Group & Co <test>',
+            subgroup=None,
+            bioghist_content=None
+        )
+
+        self.assertIn('Group &amp; Co &lt;test&gt;', result)
+        self.assertNotIn('Group & Co <test>', result)
+
+    def test_add_collection_links_to_eac_cpf(self):
+        """Test adding ead_id descriptiveNote to resourceRelation elements."""
+
+        # Mock the client response
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {'ead_id': 'TEST.1.2.3'}
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.add_collection_links_to_eac_cpf(REAL_EAC_CPF_WITH_NAMESPACE)
+
+        # Should add descriptiveNote (namespace-aware check)
+        self.assertIn('descriptiveNote', result)
+        self.assertIn('ead_id:TEST.1.2.3', result)
+        # Should preserve EAC-CPF namespace
+        self.assertIn('urn:isbn:1-931666-33-4', result)
+
+    def test_multiple_creators_with_namespace(self):
+        """Test handling multiple creators when EAD has default namespace."""
+        xml_with_namespace = '''<?xml version="1.0" encoding="UTF-8"?>
+<ead xmlns="urn:isbn:1-931666-22-9">
+  <archdesc level="collection">
+    <did>
+      <origination label="Creator">
+        <corpname source="lcnaf">First Corp</corpname>
+      </origination>
+      <origination label="Creator">
+        <persname source="lcnaf">Second Person</persname>
+      </origination>
+    </did>
+  </archdesc>
+</ead>'''
+
+        resource = {
+            'linked_agents': [
+                {'role': 'creator', 'ref': '/agents/corporate_entities/123'},
+                {'role': 'creator', 'ref': '/agents/people/456'}
+            ]
+        }
+
+        result = self.service.add_creator_ids_to_ead(xml_with_namespace, resource)
+
+        # Should add both creator IDs
+        self.assertIn('creator_id="creator_corporate_entities_123"', result)
+        self.assertIn('creator_id="creator_people_456"', result)
+        # Should preserve namespace
+        self.assertIn('urn:isbn:1-931666-22-9', result)
+
+    def test_add_collection_links_idempotent(self):
+        """Test that adding collection links is idempotent."""
+        eac_cpf_xml = '''<eac-cpf>
+<resourceRelation resourceRelationType="creatorOf" xlink:href="https://aspace.test/repositories/2/resources/123">
+  <relationEntry>Test Collection</relationEntry>
+  <descriptiveNote>
+    <p>ead_id:TEST.1.2.3</p>
+  </descriptiveNote>
+</resourceRelation>
+</eac-cpf>'''
+
+        result = self.service.add_collection_links_to_eac_cpf(eac_cpf_xml)
+
+        # Should not call the client since descriptiveNote already exists
+        self.mock_client.get.assert_not_called()
+        # Should return unchanged XML
+        self.assertEqual(eac_cpf_xml, result)
+
+    def test_add_collection_links_skips_digital_objects(self):
+        """Test that digital object URLs are skipped silently."""
+        eac_cpf_xml = '''<eac-cpf>
+<resourceRelation resourceRelationType="creatorOf" xlink:href="https://aspace.test/repositories/2/digital_objects/123">
+  <relationEntry>Test Digital Object</relationEntry>
+</resourceRelation>
+</eac-cpf>'''
+
+        result = self.service.add_collection_links_to_eac_cpf(eac_cpf_xml)
+
+        # Should not call the client
+        self.mock_client.get.assert_not_called()
+        # Should return unchanged XML
+        self.assertEqual(eac_cpf_xml, result)
+
+    def test_add_collection_links_handles_fetch_errors(self):
+        """Test that fetch errors are handled gracefully."""
+        eac_cpf_xml = '''<eac-cpf>
+<resourceRelation resourceRelationType="creatorOf" xlink:href="https://aspace.test/repositories/2/resources/123">
+  <relationEntry>Test Collection</relationEntry>
+</resourceRelation>
+</eac-cpf>'''
+
+        # Mock a 404 response
+        mock_response = Mock()
+        mock_response.status_code = 404
+        self.mock_client.get.return_value = mock_response
+
+        result = self.service.add_collection_links_to_eac_cpf(eac_cpf_xml)
+
+        # Should log a warning
+        self.mock_log.warning.assert_called()
+        # Should return unchanged XML
+        self.assertNotIn('<descriptiveNote>', result)
+
+    def test_build_bioghist_element(self):
+        """Test building bioghist XML element from structured data."""
+        result = self.service.build_bioghist_element(
+            agent_name='Test Agent',
+            persistent_id='abc123',
+            paragraphs=['First paragraph', 'Second paragraph']
+        )
+
+        self.assertIn('<bioghist id="aspace_abc123">', result)
+        self.assertIn('<head>Historical Note from Test Agent Creator Record</head>', result)
+        self.assertIn('<p>First paragraph</p>', result)
+        self.assertIn('<p>Second paragraph</p>', result)
+        self.assertIn('</bioghist>', result)
+
+    def test_build_bioghist_element_without_persistent_id(self):
+        """Test building bioghist without persistent_id."""
+        result = self.service.build_bioghist_element(
+            agent_name='Test Agent',
+            persistent_id=None,
+            paragraphs=['Content']
+        )
+
+        self.assertIn('<bioghist>', result)
+        self.assertNotIn('id=', result)
+        self.assertIn('<p>Content</p>', result)
+
+    def test_build_bioghist_element_escapes_agent_name(self):
+        """Test that agent name is properly XML-escaped."""
+        result = self.service.build_bioghist_element(
+            agent_name='Agent & Co <test>',
+            persistent_id='abc',
+            paragraphs=['Content']
+        )
+
+        self.assertIn('Agent &amp; Co &lt;test&gt;', result)
+
+    def test_build_bioghist_element_escapes_paragraph_content(self):
+        """Test that paragraph content with special XML characters is properly escaped."""
+        result = self.service.build_bioghist_element(
+            agent_name='Test Agent',
+            persistent_id='abc',
+            paragraphs=['Content with & ampersand', 'Content with <tags> and "quotes"']
+        )
+
+        self.assertIn('<p>Content with &amp; ampersand</p>', result)
+        self.assertIn('<p>Content with &lt;tags&gt; and "quotes"</p>', result)
+
+    def test_validate_eac_cpf_xml_valid(self):
+        """Test validating valid EAC-CPF XML."""
+        eac_cpf_xml = '<eac-cpf><control></control></eac-cpf>'
+
+        root = self.service.validate_eac_cpf_xml(eac_cpf_xml, '/agents/corporate_entities/123')
+
+        self.assertIsNotNone(root)
+        self.assertEqual(root.tag, 'eac-cpf')
+
+    def test_validate_eac_cpf_xml_invalid(self):
+        """Test validating invalid EAC-CPF XML."""
+        eac_cpf_xml = '<eac-cpf><control>'  # Missing closing tags
+
+        root = self.service.validate_eac_cpf_xml(eac_cpf_xml, '/agents/corporate_entities/123')
+
+        self.assertIsNone(root)
+        self.mock_log.error.assert_called()
+
+    def test_add_collection_links_requires_client(self):
+        """Test that add_collection_links_to_eac_cpf requires a client."""
+        service_no_client = XmlTransformService(client=None)
+
+        with self.assertRaises(ValueError) as context:
+            service_no_client.add_collection_links_to_eac_cpf('<eac-cpf></eac-cpf>')
+
+        self.assertIn('Client is required', str(context.exception))
+
+    def test_namespace_preservation_ead_with_declaration(self):
+        """Test that EAD namespace prefixes and XML declaration are preserved."""
+        xml_input = '''<?xml version="1.0" encoding="UTF-8"?>
+<ead xmlns="urn:isbn:1-931666-22-9" xmlns:xlink="http://www.w3.org/1999/xlink">
+  <eadheader>
+    <eadid>test-collection</eadid>
+  </eadheader>
+  <archdesc level="collection">
+    <did>
+      <unittitle>Test Collection</unittitle>
+      <origination label="Creator">
+        <corpname source="lcnaf">Test Corporation</corpname>
+      </origination>
+    </did>
+  </archdesc>
+</ead>'''
+        
+        resource = {
+            'linked_agents': [
+                {'role': 'creator', 'ref': '/agents/corporate_entities/123'}
+            ]
+        }
+        
+        result = self.service.add_creator_ids_to_ead(xml_input, resource)
+        
+        # Should have XML declaration
+        self.assertTrue(result.startswith('<?xml'), 'XML declaration should be preserved')
+        self.assertIn('version', result[:50])  # Check in first 50 chars
+        self.assertIn('1.0', result[:50])
+        self.assertIn('encoding', result[:50])
+        self.assertIn('UTF-8', result[:50])
+        
+        # Should preserve default EAD namespace (not rewrite to ns0:)
+        self.assertIn('xmlns="urn:isbn:1-931666-22-9"', result)
+        self.assertNotIn('ns0:', result, 'Default namespace should not be rewritten to ns0:')
+        
+        # Should preserve xlink namespace
+        self.assertIn('xmlns:xlink="http://www.w3.org/1999/xlink"', result)
+        
+        # Should add arcuit namespace
+        self.assertIn('xmlns:arcuit="https://arcuit.library.illinois.edu/ead-extensions"', result)
+        
+        # Tags should use default namespace, not prefixed
+        self.assertIn('<ead ', result)
+        self.assertIn('<archdesc ', result)
+        self.assertNotIn('<ns0:ead', result)
+
+    def test_namespace_preservation_eac_cpf_with_declaration(self):
+        """Test that EAC-CPF namespace prefixes and XML declaration are preserved."""
+        xml_input = '''<?xml version="1.0" encoding="UTF-8"?>
+<eac-cpf xmlns="urn:isbn:1-931666-33-4" xmlns:xlink="http://www.w3.org/1999/xlink">
+  <control>
+    <recordId>test-agent</recordId>
+  </control>
+  <cpfDescription>
+    <relations>
+      <resourceRelation resourceRelationType="creatorOf" 
+                       xlink:href="https://aspace.test/repositories/2/resources/123">
+        <relationEntry>Test Collection</relationEntry>
+      </resourceRelation>
+    </relations>
+  </cpfDescription>
+</eac-cpf>'''
+        
+        # Mock the client response
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {'ead_id': 'TEST.1.2.3'}
+        self.mock_client.get.return_value = mock_response
+        
+        result = self.service.add_collection_links_to_eac_cpf(xml_input)
+        
+        # Should have XML declaration
+        self.assertTrue(result.startswith('<?xml'), 'XML declaration should be preserved')
+        self.assertIn('version', result[:50])  # Check in first 50 chars
+        self.assertIn('1.0', result[:50])
+        self.assertIn('encoding', result[:50])
+        self.assertIn('UTF-8', result[:50])
+        
+        # Should preserve default EAC-CPF namespace (not rewrite to ns0:)
+        self.assertIn('xmlns="urn:isbn:1-931666-33-4"', result)
+        self.assertNotIn('ns0:', result, 'Default namespace should not be rewritten to ns0:')
+        
+        # Should preserve xlink namespace
+        self.assertIn('xmlns:xlink="http://www.w3.org/1999/xlink"', result)
+        
+        # Tags should use default namespace, not prefixed
+        self.assertIn('<eac-cpf ', result)
+        self.assertIn('<resourceRelation ', result)
+        self.assertNotIn('<ns0:eac-cpf', result)
+
+    def test_namespace_preservation_inject_metadata(self):
+        """Test that inject_collection_metadata preserves namespaces."""
+        xml_input = '''<?xml version="1.0" encoding="UTF-8"?>
+<ead xmlns="urn:isbn:1-931666-22-9">
+  <eadheader>
+    <eadid>test-collection</eadid>
+  </eadheader>
+  <archdesc level="collection">
+    <did>
+      <unittitle>Test Collection</unittitle>
+    </did>
+  </archdesc>
+</ead>'''
+        
+        bioghist_content = '''<bioghist id="aspace_123">
+  <head>Historical Note from Test Agent Creator Record</head>
+  <p>Test paragraph</p>
+</bioghist>'''
+        
+        result = self.service.inject_collection_metadata(
+            xml_input,
+            record_group="Test Group",
+            subgroup="Test Subgroup",
+            bioghist_content=bioghist_content
+        )
+        
+        # Should have XML declaration
+        self.assertTrue(result.startswith('<?xml'), 'XML declaration should be preserved')
+        
+        # Should preserve default EAD namespace
+        self.assertIn('xmlns="urn:isbn:1-931666-22-9"', result)
+        self.assertNotIn('ns0:', result, 'Default namespace should not be rewritten to ns0:')
+        
+        # Inserted elements should be in same namespace (no xmlns="" pollution)
+        self.assertNotIn('xmlns=""', result, 'Should not have empty namespace declarations')
+        
+        # Tags should use default namespace, not prefixed
+        self.assertIn('<recordgroup>', result)
+        self.assertIn('<subgroup>', result)
+        self.assertIn('<bioghist ', result)
+        self.assertNotIn('<ns0:recordgroup', result)
+
+    def test_namespace_preservation_no_declaration_maintained(self):
+        """Test that documents without XML declaration remain without it when no changes made."""
+        xml_input = '''<eac-cpf>
+<control>
+  <recordId>test-agent</recordId>
+</control>
+</eac-cpf>'''
+        
+        # No changes will be made (no resourceRelations)
+        result = self.service.add_collection_links_to_eac_cpf(xml_input)
+        
+        # Should not add XML declaration when original didn't have one and no changes made
+        self.assertEqual(xml_input, result, 'Unchanged XML should be returned as-is')
+        self.assertFalse(result.startswith('<?xml'), 'Should not add XML declaration to unchanged document')
+
+
+if __name__ == '__main__':
+    unittest.main()