Source code for cadbiom_cmd.interaction_graph

# -*- coding: utf-8 -*-
# Copyright (C) 2017-2020  IRISA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# The original code contained here was initially developed by:
#
#     Pierre Vignet.
#     IRISA
#     Dyliss team
#     IRISA Campus de Beaulieu
#     35042 RENNES Cedex, FRANCE
"""
This module groups functions directly related to the design of an interaction
weighted graph based on the search of molecules of interest.

Entry point: :meth:`~cadbiom_cmd.interaction_graph.json_2_interaction_graph`.
"""
from __future__ import unicode_literals
from __future__ import print_function

# Standard imports
import time
import json
import os
import glob
import itertools as it
from collections import defaultdict, Counter
import networkx as nx

# Library imports
import cadbiom.commons as cm

LOGGER = cm.logger()


## Handle *.json files #########################################################






## Build the graph ### #########################################################

[docs]def filter_trajectories(trajectories, molecules_of_interest): """Get solutions and count frontier places related to the given molecules of interest. :param trajectories: A generator of tuples with tuple of frontier places as keys and set of places involved in transitions as values. .. code-block:: python (("Ax", "Bx"), {"n3", "Bx"}) :param molecules_of_interest: Iterable of molecules of interest. :type trajectories: <generator <tuple <tuple>, <set>>> :type molecules_of_interest: <tuple> :return: A tuple of all solutions related to the molecules of interest, and a dictionary of related frontier places and their occurences for each molecule of interest. .. code-block:: python # For molecules of interest "A" and "B" ((("frontier_1", "frontier_2", "frontier_3"),), {"A": { "frontier_1": 1, "frontier_2": 1, }, "B": { "frontier_3": 1, }, }) :rtype: <tuple <tuple <tuple <str>>>, <dict <str>: <Counter <str>: <int>>>> """ molecules_of_interest = set(molecules_of_interest) def entity_is_in_trajectory(sol, trajectory_places): """Search interactions between molecules of interest and entities involved in the given trajectory, and pre-build binary interactions. We assume that the molecules of interest **are** in the trajectories. We associate with each entity the trajectories containing them. :param trajectory_places: A trajectory: i.e a set of entities. :type trajectory_places: <set> :return: True if an entity of interest is found in the given trajectory. :rtype: <boolean> """ entity_found = False # Search each entity of interest among the places in the trajectory g = (searched_molec for searched_molec in molecules_of_interest if searched_molec in trajectory_places) for searched_molec in g: # Associate the entity of interest with the frontier places and # count their occurences. # The count is kept in order to weight the edges of the graph. binary_interactions[searched_molec] += Counter(sol) # trajectory_places entity_found = True return entity_found binary_interactions = defaultdict(Counter) # Almost one entity of interest is found in the trajectory # => we keep the solution filtered_macs = tuple( sol for sol, trajectory_places in trajectories if entity_is_in_trajectory(sol, trajectory_places) ) assert filtered_macs, "No molecule of interest was found." return filtered_macs, binary_interactions
[docs]def build_interactions(filtered_macs, binary_interactions): """Make binary interactions used by the graph as edges PS: genes and stimulis are frontier places. :param filtered_macs: All solutions related to the molecules of interest. .. code-block:: python (("frontier_1", "frontier_2", "frontier_3"),) :type filtered_macs: <tuple <tuple <str>>> :param binary_interactions: A dictionary of related frontier places. .. code-block:: python # For molecules of interest "A" and "B" {"A": { "frontier_1": 1, "frontier_2": 1, }, "B": { "frontier_3": 1, }, } :type binary_interactions: <dict <str>: <Counter <str>: <int>>> :return: Various Counters of binary interactions: * all_genes: All genes in all the solutions * all_stimuli: All stimulis in all the solutions * genes_interactions: Interactions between genes in the same solution * stimulis_interactions: Interactions between stimuli in the same solution * genes_stimuli_interactions: Interactions between genes and stimulis in the same solution * molecule_stimuli_interactions: Counter interactions between molecules of interest and frontier places that are not genes (stimuli) in trajectories (i.e.: ``(molecule, stimulus)``). :rtype: <set>, <set>, <Counter>, <Counter>, <Counter>, <Counter> """ genes_stimuli_interactions = Counter() genes_interactions = Counter() stimulis_interactions = Counter() # All genes in frontier places all_genes = set() # All other entities in frontier places all_stimuli = set() for frontier_places in filtered_macs: # Extract genes from the frontier places genes = { frontier_place for frontier_place in frontier_places if "_gene" in frontier_place } # Stimulis are the other places stimuli = set(frontier_places) - genes all_genes.update(genes) all_stimuli.update(stimuli) # For every solution composed of frontier places, # compute binary interactions between: # genes and stimulis genes_stimuli_interactions.update(it.product(genes, stimuli)) # genes two by two genes_interactions.update(it.combinations(genes, 2)) # stimulis two by two stimulis_interactions.update(it.combinations(stimuli, 2)) # print("genes/stimulis interactions:", genes_stimuli_interactions) # print("genes interactions:", genes_interactions) # print("stimulis interactions:", stimulis_interactions) LOGGER.debug("build_interactions:: all genes:\n%s", all_genes) LOGGER.debug("build_interactions:: all stimuli:\n%s", all_stimuli) ####### def make_molec_stimuli_interactions(molecule_of_interest, places_occurrences): """Make binary interactions between a molecule of interest and the stimuli in their solutions. We also add to each generated tuple, its number of occurences. We avoid all interactions that contain genes, and do not contain stimuli. .. warning:: Will not work if the molecule of interest is only in conditions and not in trajectories. In this case the molecule is a frontier place (an input of the model); so it seems useless to search this kind of entity. :param molecule_of_interest: A searched molecule (user input). :param places_occurrences: Counter with frontier placs as keys, and their occurences as values. :type molecule_of_interest: <str> :type places_occurrences: <Counter <str>: <int>> """ frontier_places = set(places_occurrences.keys()) # Remove interactions with a gene # => remove the link between the gene (frontier place) and the molecule of interest # => remove external stage of the graphe frontier_places -= all_genes # Remove interactions without stimuli (remove all entities that are not boundaries) # => Keep interaction between stimuli and molecule of interest # PS: frontier_places != all_stimuli because all_stimuli is a set # of all stimuli accross all solutions. # Here, frontier_places is a set of all solutions linked to the # given molecule_of_interest frontier_places &= all_stimuli # Get occurences for each binary interaction (molecule, frontier_place) return { molec_place: places_occurrences[molec_place[1]] for molec_place in it.product([molecule_of_interest], frontier_places) } g = ( make_molec_stimuli_interactions(*data) for data in binary_interactions.iteritems() ) # warning: marchera pas si la molec of interest est dans les conditions # et non dans les places des trajectoires... # => Comme si la molecule n'était pas trouvée molecule_stimuli_interactions = Counter() [molecule_stimuli_interactions.update(f_bin) for f_bin in g] return all_genes, all_stimuli, genes_interactions, \ stimulis_interactions, \ genes_stimuli_interactions, \ molecule_stimuli_interactions
[docs]def build_graph(output_dir, all_genes, all_stimuli, genes_interactions, stimulis_interactions, genes_stimuli_interactions, molecule_stimuli_interactions): """Make an interaction weighted graph based on the search of molecules of interest :Edges: * gene - gene: Two genes present simultaneously in a solution * stimulus - stimulus: Two stimuli present simultaneously in a solution * gene - stimulus: One gene and one stimulus present simultaneously in a solution (deprecated) * molecule of interest - stimulus: A molecule of interest in a trajectory related to a solution that contains a stimulus. :Legend of the edges: * gene - gene: red * stimulus - stimulus: blue (deprecated) * gene - stimulus: red * molecule of interest - stimulus: yellow :Legend of the nodes: * genes: red * stimuli: blue * molecules of interest: yellow :param output_dir: Output path. :param all_genes: All genes in all the solutions :param all_stimuli: All stimulis in all the solutions :param genes_interactions: Interactions between genes in the same solution :param stimulis_interactions: Interactions between stimuli in the same solution :param genes_stimuli_interactions: Interactions between genes and stimulis in the same solution :param molecule_stimuli_interactions: Counter interactions between molecules of interest and frontier places that are not genes (stimuli) in trajectories (i.e.: ``(molecule, stimulus)``). :type output_dir: <str> :type all_genes: <set> :type all_stimuli: <set> :type genes_interactions: <Counter> :type stimulis_interactions: <Counter> :type genes_stimuli_interactions: <Counter> :type molecule_stimuli_interactions: <Counter> """ LOGGER.info("Building graph...") # Count all nodes nodes_count = Counter(it.chain(*genes_interactions.keys())) nodes_count.update(it.chain(*stimulis_interactions.keys())) nodes_count.update(it.chain(*genes_stimuli_interactions.keys())) nodes_count.update(it.chain(*molecule_stimuli_interactions.keys())) # Make Graph ############################################################### def colormap(node): """Get color assigned to the given node depending on its group""" if node in all_genes: return "red" elif node in all_stimuli: return "blue" # Molecule of interest return "yellow" G = nx.DiGraph() # Add all nodes with their attributes: color & weight [G.add_node(node, weight=weight, color=colormap(node)) for node, weight in nodes_count.iteritems()] def build_edge_data(edges): """Build edge tuple for networkx API""" return [(node1, node2, weight) for (node1, node2), weight in edges.iteritems()] # Add all edges # PS Syntax: G.add_weighted_edges_from([(0,1,3.0),(1,2,7.5)],color='red') # Interactions between genes in the same solution G.add_weighted_edges_from(build_edge_data(genes_interactions), color="red") # Interactions between stimuli in the same solution # G.add_weighted_edges_from(build_edge_data(stimulis_interactions), color='blue') # Interactions between genes and stimulis in the same solution G.add_weighted_edges_from(build_edge_data(genes_stimuli_interactions), color="red") # Interactions between molecules of interest and frontier places # that are not genes in trajectories G.add_weighted_edges_from( build_edge_data(molecule_stimuli_interactions), color="yellow" ) # Write graph nx.write_graphml(G, output_dir + "interaction_graph.graphml")
[docs]def json_2_interaction_graph(output_dir, molecules_of_interest, path): """Entry point for json_2_interaction_graph Read decompiled solutions files (\*.json\* files produced by the directive ``queries_2_json``) and make a graph of the relationships between one or more molecules of interest, the genes and other frontier places/boundaries found among all the solutions. More information about the graph and its legend: :meth:`~cadbiom_cmd.interaction_graph.build_graph`. :param output_dir: Output path. :param molecules_of_interest: Iterable of molecules of interest. :param path: Filepath/directory of a JSON solution file. :type output_dir: <str> :type molecules_of_interest: <tuple> :type path: <str> """ chrono_start = time.time() # get_solutions_and_related_places: trajectories # filter_trajectories: filtered_macs, binary_interactions # build_interactions: data to unpack for build_graph build_graph( output_dir, *build_interactions( *filter_trajectories( get_solutions_and_related_places(path), molecules_of_interest ) ) ) LOGGER.info("Graph generated in %s", time.time() - chrono_start)