# -*- coding: utf-8 -*-
# Copyright (C) 2017-2020 IRISA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# The original code contained here was initially developed by:
#
# Pierre Vignet.
# IRISA
# Dyliss team
# IRISA Campus de Beaulieu
# 35042 RENNES Cedex, FRANCE
"""
Display, compare, and query a model
"""
from __future__ import unicode_literals
from __future__ import print_function
# Standard imports
from collections import defaultdict
import json
import csv
from logging import DEBUG
from urllib import quote as urllib_quote
import networkx as nx
import networkx.algorithms.isomorphism as iso
# Library imports
from cadbiom.models.biosignal.sig_expr import *
from cadbiom.models.guard_transitions.translators.chart_xml import MakeModelFromXmlFile
from cadbiom.models.guard_transitions.analyser.static_analysis import StaticAnalyzer
from tools.models import Reporter
from tools.models import (
get_transitions,
get_frontier_places,
get_model_identifier_mapping,
)
from tools.models import get_places_data
from tools.graphs import (
build_graph,
get_json_graph,
export_graph,
get_solutions_graph_data,
)
import cadbiom.commons as cm
LOGGER = cm.logger()
[docs]def graph_isomorph_test(
model_file_1, model_file_2, output_dir="graphs/", make_graphs=False, make_json=False
):
"""Entry point for model consistency checking.
This functions checks if the graphs based on the two given models have
the same topology, nodes & edges attributes/roles.
.. todo:: This function should not write any file, and should be exported
to the module tools.
.. note:: Cf graphmatcher
https://networkx.github.io/documentation/development/reference/generated/networkx.algorithms.isomorphism.categorical_edge_match.html
:Use in scripts:
.. code-block:: python
>>> from cadbiom_cmd.models import graph_isomorph_test
>>> print(graph_isomorph_test('model_1.bcx', 'model_2.bcx'))
INFO: 3 transitions loaded
INFO: 3 transitions loaded
INFO: Build graph for the solution: Connexin_32_0 Connexin_26_0
INFO: Build graph for the solution: Connexin_32_0 Connexin_26_0
INFO: Topology checking: True
INFO: Nodes checking: True
INFO: Edges checking: True
{'nodes': True, 'edges': True, 'topology': True}
:param model_file_1: Filepath of the first model.
:param model_file_2: Filepath of the second model.
:key output_dir: Output path.
:key make_graphs: If True, make a GraphML file in output path.
:key make_json: If True, make a JSON dump of results in output path.
:type model_file_1: <str>
:type model_file_2: <str>
:type output_dir: <str>
:type make_graphs: <boolean>
:type make_json: <boolean>
:return: Dictionary with the results of tests.
keys: 'topology', 'nodes', 'edges'; values: booleans
:rtype: <dict <str>: <boolean>>
"""
# Load transitions in the models
# Transitions structure format:
# {u'h00': [('Ax', 'n1', {u'label': u'h00[]'}),]
parser_1 = MakeModelFromXmlFile(model_file_1)
parser_2 = MakeModelFromXmlFile(model_file_2)
transitions_1 = get_transitions(parser_1)
transitions_2 = get_transitions(parser_2)
# Get all nodes
all_places_1 = parser_1.handler.node_dict.keys()
all_places_2 = parser_2.handler.node_dict.keys()
# Get all frontier places in the models
# (places that are never in output position in all transitions)
# EDIT: why we use all_places from the model instead of
# (input_places - output_places) to get frontier places ?
# Because some nodes are only in conditions and not in transitions.
# If we don't do that, these nodes are missing when we compute
# valid paths from conditions.
front_places_1 = get_frontier_places(transitions_1, all_places_1)
front_places_2 = get_frontier_places(transitions_2, all_places_2)
if LOGGER.getEffectiveLevel() == DEBUG:
LOGGER.debug("Frontier places 1: %s", sorted(front_places_1))
LOGGER.debug("Frontier places 2: %s", sorted(front_places_2))
# Build graphs & get networkx object
# We give all events in the model as a list of steps
# So we simulate a cadbiom solution (with all events in the model).
res_1 = build_graph(front_places_1, [transitions_1.keys()], transitions_1)
G1 = res_1[0]
res_2 = build_graph(front_places_2, [transitions_2.keys()], transitions_2)
G2 = res_2[0]
# Checking
nm = iso.categorical_node_match("color", "grey")
em = iso.categorical_edge_match("color", "")
check_state = {
"topology": nx.is_isomorphic(G1, G2),
"nodes": nx.is_isomorphic(G1, G2, node_match=nm),
"edges": nx.is_isomorphic(G1, G2, edge_match=em),
}
LOGGER.info("Topology checking: %s", check_state["topology"])
LOGGER.info("Nodes checking: %s", check_state["nodes"])
LOGGER.info("Edges checking: %s", check_state["edges"])
# Draw graph
if make_graphs:
export_graph(output_dir, front_places_1, "first", *res_1)
export_graph(output_dir, front_places_2, "second", *res_2)
# Export to JSON file
if make_json:
with open(output_dir + "graph_isomorphic_test.json", "w") as fd:
fd.write(json.dumps(check_state, sort_keys=True, indent=4) + "\n")
return check_state
[docs]def low_graph_info(model_file, graph_data=False, centralities=False):
"""Low level function for :meth:`~cadbiom_cmd.models.model_graph`.
Get JSON data with information about the graph based on the model.
.. seealso:: :meth:`tools.graphs.get_solutions_graph_data`.
:param model_file: File for the model.
:key graph_data: Also return a dictionary with the results of measures on
the given graph.
keys: measure's name; values: measure's value
Example:
.. code-block:: javascript
{
'modelFile': 'string',
'modelName': 'string',
'events': int,
'entities': int,
'transitions': int,
'graph_nodes': int,
'graph_edges': int,
'centralities': {
'degree': {
'entity_1': float,
'entity_2': float
},
'strongly_connected': boolean,
'weakly_connected': boolean,
'max_degree': int,
'min_degree': int,
'average_degree': float,
'connected_components_number': int,
'connected_components': list,
'average_shortest_paths': int,
}
}
:key centralities: If True with, compute centralities
(degree, closeness, betweenness).
:type model_file: <str>
:type centralities: <boolean>
:return: Tuple of tuples from :meth:`tools.graphs.build_graph`, set of
frontier places, and dictionary with the results of measures on the
given graph if requested.
:rtype: <tuple>, <str>, <dict>
"""
# Load transitions in the model
# Transitions structure format:
# {u'h00': [('Ax', 'n1', {u'label': u'h00[]'}),]
parser_1 = MakeModelFromXmlFile(model_file)
transitions_1 = get_transitions(parser_1)
# Get all nodes
all_places_1 = parser_1.handler.node_dict.keys()
# Get the model
model = parser_1.handler.model
# Get all frontier places in the models
# (places that are never in output position in all transitions)
# EDIT: why we use all_places from the model instead of
# (input_places - output_places) to get frontier places ?
# Because some nodes are only in conditions and not in transitions.
# If we don't do that, these nodes are missing when we compute
# valid paths from conditions.
front_places = get_frontier_places(transitions_1, all_places_1)
if LOGGER.getEffectiveLevel() == DEBUG:
LOGGER.debug("Frontier places: %s", sorted(front_places))
# Build graphs & get networkx object
# We give all events in the model as a list of steps
# So we simulate a cadbiom solution (with all events in the model).
res_1 = build_graph(front_places, [transitions_1.keys()], transitions_1)
G = res_1[0]
if not graph_data:
return res_1, front_places
info = {
"modelFile": model_file,
"modelName": model.name,
"events:": len(transitions_1), # One event can have multiple transitions
"entities": len(all_places_1), # places
"boundaries": len(front_places), # frontier places
"transitions": len(model.transition_list),
}
get_solutions_graph_data(G, info, centralities)
LOGGER.info(info)
return res_1, front_places, info
[docs]def low_model_info(
model_file, all_entities=False, boundaries=False, genes=False, smallmolecules=False
):
"""Low level function for :meth:`~cadbiom_cmd.models.model_info`.
Get JSON data with information about the model and its entities.
.. TODO::
- add dump of transitions (option)
- See get_transitions remark about its deprecation for the current use case
- Dump roles of boundaries, computed here or in ChartModel...
Already implemented for queries_2_common_graph and for pie charts.
.. seealso:: Format de sortie de: :meth:`tools.solutions.convert_solutions_to_json`
:param model_file: File for the model.
:key all_entities: If True, data for all places of the model are returned
(optional).
:key boundaries: If True, only data for the frontier places of the model
are returned (optional).
:key genes: If True, only data for the genes of the model are returned
(optional).
:key smallmolecules: If True, only data for the smallmolecules of the model
are returned (optional).
:type model_file: <str>
:type all_entities: <boolean>
:type boundaries: <boolean>
:type genes: <boolean>
:type smallmolecules: <boolean>
:return: Dictionary with informations about the model and the queried nodes.
:Example:
.. code-block:: javascript
{
'modelFile': 'string',
'modelName': 'string',
'events': int,
'entities': int,
'boundaries': int,
'transitions': int,
'entitiesLocations': {
'cellular_compartment_a': int,
'cellular_compartment_b': int,
...
},
'entitiesTypes': {
'biological_type_a': int,
'biological_type_b': int,
...
},
'entitiesData': {
[{
'cadbiomName': 'string',
'immediateSuccessors': ['string', ...],
'uri': 'string',
'entityType': 'string',
'entityRef': 'string',
'location': 'string',
'names': ['string', ...],
'xrefs': {
'external_database_a': ['string', ...],
'external_database_b': ['string', ...],
...
}
}],
...
}
}
:rtype: <dict>
"""
# Load transitions in the model
# Transitions structure format:
# {u'h00': [('Ax', 'n1', {u'label': u'h00[]'}),]
parser_1 = MakeModelFromXmlFile(model_file)
transitions_1 = get_transitions(parser_1)
# Get all nodes
all_places_1 = parser_1.handler.node_dict.keys()
# Get the model
model = parser_1.handler.model
# Get all frontier places in the models
# (places that are never in output position in all transitions)
# EDIT: why we use all_places from the model instead of
# (input_places - output_places) to get frontier places ?
# Because some nodes are only in conditions and not in transitions.
# If we don't do that, these nodes are missing when we compute
# valid paths from conditions.
front_places = get_frontier_places(transitions_1, all_places_1)
# Basic informations
info = {
"modelFile": model_file,
"modelName": model.name,
"events:": len(transitions_1), # One event can have multiple transitions
"entities": len(all_places_1), # places
"boundaries": len(front_places), # frontier places
"transitions": len(model.transition_list),
}
# Complete the data with StaticAnalysis
# Call custom Reporter instead of CompilReporter
# from cadbiom_gui.gt_gui.utils.reporter import CompilReporter
static_analyser = StaticAnalyzer(Reporter())
static_analyser.build_from_chart_model(model)
info['entitiesLocations'], info['entitiesTypes'] = \
static_analyser.get_stats_entities_data()
# There are multiple successors per origin...
# Used to add this info to every nodes requested; see below
places_successors = defaultdict(list)
for trans in parser_1.model.transition_list:
places_successors[trans.ori.name].append(trans.ext.name)
# Filter places
if all_entities:
info["entitiesData"] = get_places_data(all_places_1, model)
if boundaries:
info["entitiesData"] = get_places_data(front_places, model)
if genes:
g = (place_name for place_name in all_places_1 if "_gene" in place_name)
info["entitiesData"] = get_places_data(g, model)
if smallmolecules:
# Filter on entityTypes
info["entitiesData"] = [
data
for data in get_places_data(all_places_1, model)
if data["entityType"] == "SmallMolecule"
]
# Edit places and insert immediate successors... It is ugly but requested...
# Another request that brings overhead for nothing...
for place in info["entitiesData"]:
place["immediateSuccessors"] = places_successors[place["cadbiomName"]]
return info
[docs]def model_identifier_mapping(model_file, *args, **kwargs):
"""Entry point for the mapping of identifiers from external databases
:param model_file: File for the model.
:key external_file: File with 1 external identifier per line.
:key external_identifiers: List of external identifiers to be mapped.
:type model_file: <str>
:type external_file: <str>
:type external_identifiers: <list>
"""
if kwargs.get("external_file", None):
with open(kwargs["external_file"], "r") as f_d:
external_identifiers = set(line.strip("\n").strip("\r") for line in f_d)
else:
external_identifiers = set(kwargs["external_identifiers"])
mapping = get_model_identifier_mapping(model_file, external_identifiers)
# Make CSV file
with open("mapping.csv", "w") as csvfile:
writer = csv.writer(csvfile, delimiter=str(";"))
# Header
writer.writerow(["external identifiers", "cadbiom identifiers"])
# Join multiple Cadbiom names with a |
g = (
(external_id, "|".join(cadbiom_names))
for external_id, cadbiom_names in mapping.iteritems()
)
writer.writerows(g)
[docs]def model_graph(model_file, output_dir="./graphs/", centralities=False, **kwargs):
"""Get quick information and make a graph based on the model.
:param model_file: File for the '.bcx' model.
:param output_dir: Output directory.
:param centralities: If True with ``--json``, compute centralities
(degree, in_degree, out_degree, closeness, betweenness).
:keyword graph: If True, make a GraphML file based on the graph maked
from the model (optional).
:keyword json: If True, make a JSON dump of results in output path(optional).
:type model_file: <str>
:type output_dir: <str>
:type centralities: <boolean>
:type graph: <boolean>
:type json: <boolean>
"""
# Bind arguments to avoid overwriting previous imports
make_json = kwargs["json"]
make_graph = kwargs["graph"]
make_json_graph = kwargs["json_graph"]
# If json is not set, remove centralities parameter (time consuming)
if not make_json:
centralities = False
res_1, front_places, model_graph_info = low_graph_info(
model_file, graph_data=True, centralities=centralities
)
# Make json graph
if make_json_graph:
# Pass a Networkx graph and get dictionary
json_data = get_json_graph(res_1[0])
with open(output_dir + "graph.json", "w") as f_d:
f_d.write(json.dumps(json_data, indent=2))
# Draw graph
if make_graph:
export_graph(
output_dir,
front_places,
urllib_quote(model_graph_info["modelName"], safe=""),
*res_1
)
# Export to json file
if make_json:
with open(output_dir + "graph_summary.json", 'w') as f_d:
f_d.write(
json.dumps(model_graph_info, sort_keys=True, indent=2) + '\n'
)
[docs]def model_info(model_file, output_dir='./',
all_entities=False, boundaries=False,
genes=False, smallmolecules=False, default=True, **kwargs):
"""Get quick and full informations about the model structure and places.
:param model_file: File for the '.bcx' model.
:key output_dir: Output directory.
:key all_entities: If True, data for all places of the model are returned
(optional).
:key boundaries: If True, only data for the frontier places of the model
are returned (optional).
:key genes: If True, only data for the genes of the model are returned
(optional).
:key smallmolecules: If True, only data for the smallmolecules of the model
are returned (optional).
:key default: Display quick description of the model
(Number of places, transitions, entities types, entities locations).
:key json: If True, make a JSON dump of results in output path(optional).
:key csv: If True, make a csv dump of informations about filtered places.
:type model_file: <str>
:type output_dir: <str>
:type all_entities: <boolean>
:type boundaries: <boolean>
:type genes: <boolean>
:type smallmolecules: <boolean>
:type default: <boolean>
:type json: <boolean>
:type csv: <boolean>
"""
# Bind arguments to avoid overwriting previous imports
make_json = kwargs["json"]
make_csv = kwargs["csv"]
if not (make_json and make_csv):
default = True
def dump_places_to_csv(entities_data, output_filename):
"""Write informations about places in the model to a csv."""
with open(output_filename, "w") as csvfile:
# Get all database names
database_names = {
db_name
for place in entities_data
for db_name in place.get("xrefs", dict()).iterkeys()
}
# Write headers
fieldnames = (
"cadbiomName", "immediateSuccessors", "names", "uri",
"entityType", "entityRef", "location"
) + tuple(database_names)
writer = csv.DictWriter(
csvfile,
fieldnames=fieldnames,
extrasaction="ignore", # Ignore keys not found in fieldnames (xrefs)
)
writer.writeheader()
for place in entities_data:
# Since we modify places, we need to make a copy in memory
temp_place = place.copy()
# Join names with a pipe...
# Handle escaped unicode characters in model
# Ex: \u03b2-catenin => β-Catenin
temp_place["names"] = "|".join(place.get("names", list())).encode(
"utf-8"
)
temp_place["immediateSuccessors"] = "|".join(
place["immediateSuccessors"]
).encode("utf-8")
# Join xrefs ids with a pipe...
for db_name, db_ids in place.get("xrefs", dict()).iteritems():
temp_place[db_name] = "|".join(db_ids).encode("utf-8")
writer.writerow(temp_place)
def get_output_filename(filetype="csv"):
"""Return the filename according to the given filters and filetype."""
if all_entities:
return "all_entities." + filetype
if boundaries:
return "boundaries." + filetype
if genes:
return "genes." + filetype
if smallmolecules:
return "smallmolecules." + filetype
if default:
# Call custom Reporter instead of CompilReporter
# from cadbiom_gui.gt_gui.utils.reporter import CompilReporter
static_analyser = StaticAnalyzer(Reporter())
static_analyser.build_from_chart_file(model_file)
print(static_analyser.get_statistics())
return
model_info = low_model_info(
model_file, all_entities, boundaries, genes, smallmolecules
)
# Export to csv file
if make_csv:
dump_places_to_csv(model_info['entitiesData'], output_dir + get_output_filename())
# Export to json file
if make_json:
with open(output_dir + "model_summary_" + get_output_filename("json"), 'w') as f_d:
# Handle escaped unicode characters in model
# Ex: \u03b2-catenin => β-Catenin
f_d.write(
json.dumps(model_info, sort_keys=True, indent=2, ensure_ascii=False).encode('utf8')
)