import logging
import os.path
from ndex2.cx2 import CX2Network, RawCX2NetworkFactory
import ndex2.constants as constants
from cellmaps_utils import hcx_utils
from cellmaps_utils.exceptions import CellMapsError
logger = logging.getLogger(__name__)
[docs]
class InteractomeToDDOTConverter:
def __init__(self, output_dir, interactome_path, ddot_file_name='interactome_ddot.txt'):
"""
Initializes the converter with the path to the interactome data in CX2 format, output directory, and file name.
:param output_dir: Directory where the output file will be saved
:type output_dir: str
:param interactome_path: Path to the interactome file
:type interactome_path: str
:param ddot_file_name: Name of the output file to write DDOT formatted data
:type ddot_file_name: str
"""
self._interactome_path = interactome_path
self._output_file = os.path.join(output_dir, ddot_file_name)
[docs]
class DDOTToInteractomeConverter:
def __init__(self, output_dir, interactome_ddot_path, interactome_file_name='interactome.cx2'):
"""
Initializes the converter to transform a DDOT formatted file to an CX2 format.
:param output_dir: Directory where the output file will be saved
:type output_dir: str
:param interactome_ddot_path: Path to the DDOT formatted file
:type interactome_ddot_path: str
:param interactome_file_name: Name of the output file for the interactome
:type interactome_file_name: str
"""
self._interactome_ddot_path = interactome_ddot_path
self._output_file = os.path.join(output_dir, interactome_file_name)
[docs]
def generate_interactome_file(self):
"""
Converts a DDOT formatted file to an interactome CX2 network file. It parses the DDOT file,
constructs nodes and edges in the interactome, and saves the output.
"""
interactome = CX2Network()
with open(self._interactome_ddot_path, 'r') as f:
for line in f:
parts = line.strip().split('\t')
id_source_node = interactome.lookup_node_id_by_name(parts[0])
source_node = id_source_node if id_source_node is not None else interactome.add_node(
attributes={'name': parts[0]})
id_target_node = interactome.lookup_node_id_by_name(parts[1])
target_node = id_target_node if id_target_node is not None else interactome.add_node(
attributes={'name': parts[1]})
id_edge = interactome.add_edge(source=source_node, target=target_node)
if len(parts) >= 3:
interactome.add_edge_attribute(id_edge, constants.EDGE_INTERACTION_EXPANDED, parts[3])
interactome.write_as_raw_cx2(self._output_file)
[docs]
class HierarchyToDDOTConverter:
def __init__(self, output_dir, hierarchy_path, ontology_file_name='ontology.ont'):
"""
Initializes the converter to transform a hierarchy data file in CX2 format into a DDOT ontology file.
:param output_dir: Directory where the output ontology file will be saved
:type output_dir: str
:param hierarchy_path: Path to the hierarchy data file
:type hierarchy_path: str
:param ontology_file_name: Name of the output ontology file
:type ontology_file_name: str
"""
self._hierarchy_path = hierarchy_path
self._output_file = os.path.join(output_dir, ontology_file_name)
[docs]
def generate_ontology_ddot_file(self):
"""
Converts a hierarchy network file into a DDOT ontology file format. This method extracts hierarchy
edges and node information to create an ontology representation suitable for DDOT applications.
"""
factory = RawCX2NetworkFactory()
hierarchy = factory.get_cx2network(self._hierarchy_path)
with open(self._output_file, 'w') as f:
for _, edge_values in hierarchy.get_edges().items():
source_id = edge_values[constants.EDGE_SOURCE]
source = hierarchy.get_node(source_id)[constants.ASPECT_VALUES].get(constants.NODE_NAME_EXPANDED,
source_id)
target_id = edge_values[constants.EDGE_TARGET]
target = hierarchy.get_node(target_id)[constants.ASPECT_VALUES].get(constants.NODE_NAME_EXPANDED,
target_id)
f.write(f"{source}\t{target}\tdefault\n")
for node_id, node_values in hierarchy.get_nodes().items():
node_attr = node_values[constants.ASPECT_VALUES]
node = node_attr.get(constants.NODE_NAME_EXPANDED, node_id)
genes = node_attr.get('CD_MemberList', '').split(' ')
for gene in genes:
f.write(f"{node}\t{gene}\tgene\n")
[docs]
class DDOTToHierarchyConverter:
def __init__(self, output_dir, ontology_ddot_path, parent_ddot_path=None, parent_ndex_url=None,
host='ndexbio.org', parent_uuid=None, ndex_user=None, ndex_password=None,
hierarchy_filename='hierarchy.cx2', interactome_filename='hierarchy_parent.cx2'):
"""
Initializes the converter to create a hierarchy in CX2 from DDOT ontology file.
:param output_dir: Directory where the output files will be saved
:type output_dir: str
:param ontology_ddot_path: Path to the DDOT formatted ontology file
:type ontology_ddot_path: str
:param parent_ddot_path: Optional path to the parent interactome DDOT file
:type parent_ddot_path: str, optional
:param parent_ndex_url: Optional URL to the parent interactome in NDEx
:type parent_ndex_url: str, optional
:param host: Hostname for the NDEx server
:type host: str
:param parent_uuid: UUID of the parent network in NDEx
:type parent_uuid: str, optional
:param ndex_user: Username for NDEx server authentication
:type ndex_user: str, optional
:param ndex_password: Password for NDEx server authentication
:type ndex_password: str, optional
:param hierarchy_filename: Name for the output hierarchy file
:type hierarchy_filename: str
:param interactome_filename: Name for the output interactome file (parent network)
:type interactome_filename: str
"""
self._output_dir = output_dir
self._hierarchy_file = os.path.join(output_dir, hierarchy_filename)
self._interactome_filename = interactome_filename
self._ontology_ddot_path = ontology_ddot_path
if parent_ndex_url is None and parent_ddot_path is None:
raise CellMapsError("Specifying either url of parent interactome in NDEx, or edge list of interactome"
" is required!")
if parent_ddot_path is not None and (parent_ndex_url is not None or parent_uuid is not None):
raise CellMapsError("You need to specify parent network as edge list in file, or as network in NDEx, "
"not both!")
self._parent_url = parent_ndex_url
self._parent_edgelist = parent_ddot_path
self._host = host
self._uuid = parent_uuid
self._username = ndex_user
self._password = ndex_password
[docs]
def generate_hierarchy_hcx_file(self):
"""
Constructs a hierarchy network from a DDOT formatted ontology file, utilizing a parent interactome.
The hierarchy is enriched with hierarchical context (HCX) information and saved as CX2.
"""
interactome = hcx_utils.get_interactome(self._host, self._uuid, self._username, self._password,
self._parent_edgelist)
hierarchy = self._get_hierarchy(interactome)
hcx_utils.add_hcx_network_annotations(hierarchy, interactome, self._output_dir, self._interactome_filename,
self._host, self._uuid)
hierarchy.write_as_raw_cx2(self._hierarchy_file)
def _get_hierarchy(self, interactome):
"""
Constructs the hierarchy based on the DDOT edge list and node list and uses interactome to add HCX information.
:param interactome: The interactome network.
:type interactome: CX2Network
:return: A CX2Network object representing the hierarchy.
:rtype: CX2Network
"""
hierarchy = CX2Network()
with open(self._ontology_ddot_path, 'r') as f:
for line in f:
source, target, edge_type = line.strip().split('\t')
if 'gene' in edge_type.lower():
source_id = hierarchy.lookup_node_id_by_name(source)
if source_id is not None:
node_attr = hierarchy.get_node(source_id)[constants.ASPECT_VALUES]
target_id = interactome.lookup_node_id_by_name(target)
if 'CD_MemberList' not in node_attr:
hierarchy.add_node_attribute(source_id, 'CD_MemberList', target)
hierarchy.add_node_attribute(source_id, 'HCX::members', [target_id])
else:
hierarchy.add_node_attribute(source_id, 'CD_MemberList',
node_attr['CD_MemberList'] + ' ' + target)
node_attr['HCX::members'].append(target_id)
hierarchy.add_node_attribute(source_id, 'HCX::members', node_attr['HCX::members'])
else:
source_id = hierarchy.lookup_node_id_by_name(source)
if source_id is None:
source_id = hierarchy.add_node(attributes={constants.NODE_NAME_EXPANDED: source})
target_id = hierarchy.lookup_node_id_by_name(target)
if target_id is None:
target_id = hierarchy.add_node(attributes={constants.NODE_NAME_EXPANDED: target})
hierarchy.add_edge(source=source_id, target=target_id)
self._update_memberlist(hierarchy=hierarchy, interactome=interactome)
root_nodes = hcx_utils.get_root_nodes(hierarchy)
hcx_utils.add_isroot_node_attribute(hierarchy, root_nodes=root_nodes)
return hierarchy
def _update_memberlist(self, hierarchy=None, interactome=None):
"""
Iterates through all nodes of network updating the CD_MemberList and HCX::members by
aggregating all the gene lists from the node and its children
:param hierarchy:
:type hierarchy: :py:class:`~ndex2.cx2.CX2Network`
"""
for node_id, node_obj in hierarchy.get_nodes().items():
children_nodes = self._get_children_of_node(hierarchy, node_id)
children_nodes.add(node_id)
member_list, member_ids = self._get_memberlists_from_nodes(hierarchy, interactome, children_nodes)
hierarchy.add_node_attribute(node_id, 'CD_MemberList', ' '.join(member_list))
hierarchy.add_node_attribute(node_id, 'HCX::members', member_ids)
def _get_children_of_node(self, network=None, node_id=None):
"""
Gets children of node as set. It is assumed the child node is the target
of the edge.
:param network:
:type network: :py:class:`~ndex2.cx2.CX2Network`
:param node_id: node whose children should be found
:return: id of children nodes
:rtype: set
"""
child_set = set()
for edge_id, edge_obj in network.get_edges().items():
if edge_obj[constants.EDGE_SOURCE] == node_id:
if edge_obj[constants.EDGE_TARGET] not in child_set:
child_set.add(edge_obj[constants.EDGE_TARGET])
child_set.update(self._get_children_of_node(network=network,
node_id=edge_obj[constants.EDGE_TARGET]))
return child_set
@staticmethod
def _get_memberlists_from_nodes(hierarchy=None, interactome=None, node_set=None):
"""
Gets set of genes from all nodes in **node_set**
:param hierarchy:
:type hierarchy: :py:class:`~ndex2.cx2.CX2Network`
:param interactome:
:type interactome: :py:class:`~ndex2.cx2.CX2Network`
:param node_set:
:type node_set: set
:return: gene_set
:rtype: list
:return: hcx_set
:rtype: list
"""
gene_set = set()
hcx_set = set()
for node_id in node_set:
member_list = hierarchy.get_node(node_id).get(constants.ASPECT_VALUES, {}).get('CD_MemberList', None)
if member_list is None:
continue
member_set = set(member_list.split(' '))
gene_set.update(member_set)
for member in list(member_set):
hcx_set.add(interactome.lookup_node_id_by_name(member))
return list(gene_set), list(hcx_set)