Source code for cadbiom.models.guard_transitions.translators.chart_xml_pid

## Filename    : chart_xml_pid.py
## Author(s)   : Geoffroy Andrieux
## Created     : 02/2012
## Revision    :
## Source      :
##
## Copyright 2010 - 2020 IRISA/IRSET
##
## This library is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published
## by the Free Software Foundation; either version 2.1 of the License, or
## any later version.
##
## This library is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY, WITHOUT EVEN THE IMPLIED WARRANTY OF
## MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.  The software and
## documentation provided here under is on an "as is" basis, and IRISA has
## no obligations to provide maintenance, support, updates, enhancements
## or modifications.
## In no event shall IRISA be liable to any party for direct, indirect,
## special, incidental or consequential damages, including lost profits,
## arising out of the use of this software and its documentation, even if
## IRISA have been advised of the possibility of such damage.  See
## the GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this library; if not, write to the Free Software Foundation,
## Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
##
## The original code contained here was initially developed by:
##
##     Geoffroy Andrieux.
##     IRISA/IRSET
##     Symbiose team
##     IRISA  Campus de Beaulieu
##     35042 RENNES Cedex, FRANCE
##
##
## Contributor(s): Michel Le Borgne
##
"""
Guarded transition interpretation of PID data
"""
from __future__ import print_function
import os

from lxml import etree

from cadbiom.models.guard_transitions.chart_model import ChartModel, ChartModelException


[docs]class MEvent(object): """ Object merging similar events in PID """ clock_count = 0 def __init__(self, inputs, outputs, act_list, inhib_list, pmid, ai_inter): """ Create a merged event. Similar reactions (same inputs and outputs) exist in PID we merge them. @param inputs: list of strings - not void @param outputs: list of strings - not void @param act_list: list of strings @param inhib_list: list of strings @param pmid: reference @param ai_inter: interpretation of activators and inhibitors (0: one activator no inhib., 1: all activ. inhib if all inhib ) """ self.inputs = inputs self.inputs.sort() self.outputs = outputs self.outputs.sort() # a unique key corresponds to a combination of inputs - outputs self.key = self.inputs[0] for input in self.inputs[1:]: self.key = self.key + '!' + input self.key = self.key + '#' for output in self.outputs: self.key = self.key + '!' + output # compute condition as a logical formula (or or and) if ai_inter == 0: act_logic = logical_or(act_list) inhib_logic = logical_or(inhib_list) elif ai_inter == 1: act_logic = logical_and(act_list) inhib_logic = logical_and(inhib_list) else: raise ChartModelException("Unknown activator and inhibitor option") if inhib_logic is not None : inhib_logic = 'not('+inhib_logic+')' condition_list = [] if act_logic is not None : condition_list.append(act_logic) if inhib_logic is not None : condition_list.append(inhib_logic) condition = logical_and(condition_list) self.condition = condition # might be None self.pmid = pmid # clock is set from outside (set_clock) self.clock = '_$_' #indeterminate clock
[docs] def set_clock(self): """ Create a clock ident and store it in the object """ self.clock = '_h_%i' % MEvent.clock_count MEvent.clock_count += 1
[docs] def merge(self, mev): """ merge two events: we take the "or" of the two conditions references are concatenated without redondancy check """ if self.condition and mev.condition: cond1 = "(" + self.condition + ")" cond2 = "(" + mev.condition + ")" self.condition = cond1 + 'or' + cond2 elif mev.condition: self.condition = mev.condition self.pmid = self.pmid + mev.pmid
def __str__(self): str_out = '\nInputs: ' for input in self.inputs: str_out = str_out + input + ", " str_out = str_out+"\nOutput: " for output in self.outputs: str_out = str_out + output + ", " str_out = str_out+"\nCondition: " if self.condition: str_out = str_out + self.condition str_out = str_out+"\nClock: " str_out = str_out + self.clock str_out = str_out +'\n' return str_out
[docs]class MakeModelFromPidFile(object): """ Object for parsing a PID xml file """ def __init__(self, xml_file, reporter, has_clock=True, ai_inter=0, model=None, e_loc = []): """ Do everything """ self.reporter = reporter self.has_clock = has_clock self.ai_interpretation = ai_inter # statistics self.node_count = 0 self.transition_count = 0 self.transcription_count = 0 self.extern_loc = e_loc # for statistics on "extern" molecules self.extern_count = 0 #day=time.strftime('%d_%m_%y',time.localtime()) if not model: self.model = ChartModel(os.path.basename(xml_file)) else: self.model = model self.top = self.model.get_root() self.dict_node = dict() self.dict_transition = dict() self.coord_x = 0.01 self.coord_y = 0.01 self.compt = 1 self.dict_id = dict() # id -> name for molecules self.dict_name = dict() # name -> id for molecules self.page = etree.parse(xml_file) self.int_cpt = 0 self.mol_cpt = 0 self.location_dict = dict() self.make_dict() # build dict_id and dict_name dict_mev = self.make_mev_dict() # build a temporary dictionary print("\n\n\nNB INTERACTIONS:", self.int_cpt) print("\n\n\nNB MOLECULES:", self.mol_cpt) cpt = 0 for key in self.location_dict.keys() : cpt += self.location_dict[key] print(key,'\t', self.location_dict[key]) print(cpt) # for k in dict_mev.keys(): # print dict_mev[k] for kmev in dict_mev: self.make_transition(dict_mev[kmev]) # for statistics self.clock_count = MEvent.clock_count # The model is currently not modified in comparison to the file self.model.modified = False # def set_extern_localisations(self, l_loc): # """ # Define external localisations in PID # """ # self.extern_loc = l_loc
[docs] def make_dict(self): """ build the two dictionaries dict_id and dict_name - Cadbiom basic names are created. Cadbiom names result from a mangling of basic name ('PF' one if any) and various information such that localisation, activity, ptm ...see throught_components """ mol_list = self.page.find('Model/MoleculeList') i = 0 for mol in mol_list: mid = mol.get('id') # id nb in PID mtype = mol.get('molecule_type') mol_name_list = mol.findall('Name') # list of synonyms in PID if mtype: for name in mol_name_list: if name.get('name_type') == "PF": # preferred name mol_name = name.get('value') i += 1 break else : # last synonym if no preferred name type mol_name = name.get('value') if self.dict_id.has_key(mid) and self.dict_id[mid]!=mol_name: mess = 'id error : '+str(mid) mess = mess + ' has two different names : ' mess = mess + str(mol_name) + ' and '+str(self.dict_id[mid]) self.reporter.display(mess) self.dict_id[mid] = mol_name # id --> name if mol_name not in self.dict_name.keys(): self.dict_name[mol_name] = [mid] # name --> list of ids elif mid not in self.dict_name[mol_name]: self.dict_name[mol_name].append(mid)
#print "dict_name : ",len(self.dict_name.keys()) #print "dict_id : ",len(self.dict_id.keys())
[docs] def make_mev_dict(self): """ Since several events with same inputs and outputs (and different conditions) may exist in PID, we merge them """ id_list = [] int_list = self.page.find('Model/InteractionList') print(len(int_list)) mev_dict = dict() for inter in int_list: source = inter.find('Source') source_id = source.get('id') if source_id == '5': # PID curated : 5, Biocarta : 2, Reactome : 7 self.int_cpt +=1 int_id = inter.get('id') int_type = inter.get('interaction_type') if int_id in id_list : continue # redondancy may happend when PID files are merged id_list.append(int_id) (ili, oli, acl, inl, pmidl) = self.extract_int(inter) mol = len(ili) + len(oli) + len(acl) + len(inl) self.mol_cpt += mol if len(ili) == 0 or len(oli) == 0: # eliminate macros continue mev = MEvent(ili, oli, acl, inl, pmidl, self.ai_interpretation) try: mev_p = mev_dict[mev.key] mev_p.merge(mev) except KeyError: # new merged event mev.set_clock() mev_dict[mev.key] = mev return mev_dict
[docs] def extract_int(self, inter): """ Extract information from a PID interaction """ input_list = [] output_list = [] activator_list = [] inhibitor_list = [] pmid_list = [] int_type = inter.get('interaction_type') #PMID part int_references = inter.find('ReferenceList') if int_references is not None : for ref in int_references : pmid_list.append(ref.get('pmid')) #Component part int_components = inter.find('InteractionComponentList') for comp in int_components : c_role = comp.get('role_type') c_name = self.throught_component(comp) # do loc and ptm mangling self.make_new_node(c_name, 'simple') if c_role == 'input' : input_list.append(c_name) elif c_role == 'output' : output_list.append(c_name) elif c_role == 'agent': activator_list.append(c_name) else : inhibitor_list.append(c_name) if int_type == 'transcription' : if len(output_list)!=0 : gene_name = output_list[0]+'_gene' self.make_new_node(gene_name, 'perm') self.transcription_count += 1 input_list.append(gene_name) return (input_list, output_list, activator_list, inhibitor_list, pmid_list)
[docs] def throught_component(self, component) : """ Cadbiom names result from a mangling of basic name ('PF' one if any) and various information such that localisation, activity, ptm ... This is done here. """ activity_state = '' location = '' ptm_name = '' ident = component.get('molecule_idref') name = self.dict_id[ident] label_list = component.findall('Label') for label in label_list : ltype = label.get('label_type') lvalue = label.get('value') if ltype == 'location' : location = self.location_converter(lvalue) else : activity_state = '_'+lvalue ptm_list = component.find('PTMExpression') if ptm_list is not None: for ptm in ptm_list : ptm_current_name = ptm_converter(ptm.get('modification')) ptm_name += ptm_current_name final_name = make_name(name, activity_state, ptm_name, location) final_name = make_new_name(final_name) return final_name
[docs] def make_transition(self, mev): """ mev is the current merge event """ in_list = mev.inputs out_list = mev.outputs # condition from activators and inhibitors (ai) ai_condition = mev.condition # building guarded transitions for input in in_list : # transition is conditioned by presence of other inputs other_in_list = [] for input2 in in_list : if input2 != input and input2 not in other_in_list : other_in_list.append(input2) input_logic_and = logical_and(other_in_list) if input_logic_and and ai_condition: condition = '('+input_logic_and +') and ('+ ai_condition + ')' elif input_logic_and: condition = input_logic_and else: condition = ai_condition # generate a guarded transition from input to each output of mev for output in out_list : if input != output : if not self.has_clock: self.no_clock_trans(mev, input, output, condition) else: self.clock_trans(mev, input, output, condition) else: #TODO warn?? pass
[docs] def no_clock_trans(self, mev, input, output, condition): """ translation without clock - data flow interpretation """ inout = input + '#' + output if self.dict_transition.has_key(inout): # a transition already exist input -> output prev_transition = self.dict_transition[inout] previous_condition = prev_transition.condition # both different conditions cond2 = previous_condition != '' cond2 = cond2 and previous_condition != condition if condition is not None and cond2: new_condition = previous_condition+' or ('+condition+')' # equal conditions elif condition is not None and previous_condition != '': new_condition = condition # first condition only elif condition: new_condition = condition # second condition only ( may be empty chain else: new_condition = previous_condition prev_transition.set_condition(new_condition) prev_transition.note += str(mev.pmid) else : # new transition input_node = self.dict_node[input] output_node = self.dict_node[output] transition = self.top.add_transition(input_node, output_node) self.transition_count += 1 if condition is not None : transition.set_condition(condition) transition.note = 'PMID ' + str(mev.pmid) self.dict_transition[inout] = transition
[docs] def clock_trans(self, mev, input, output, condition): """ translation with clock introduction """ inout = input + '#' + output if self.dict_transition.has_key(inout): # a transition already exist input -> output prev_transition = self.dict_transition[inout] previous_condition = prev_transition.condition # might be none previous_clock = prev_transition.event if mev.clock == previous_clock: cond2 = previous_condition != '' cond2 = cond2 and previous_condition != condition if condition is not None and cond2: new_condition = '(' + previous_condition + ')' new_condition = new_condition + 'or (' + condition + ')' # equal conditions elif condition is not None and previous_condition != '': new_condition = condition # first condition only elif condition: new_condition = condition # second condition only ( may be empty chain else: new_condition = previous_condition new_clock = mev.clock else: new_condition = '' ncl1 = when_clock(previous_clock, previous_condition) ncl2 = when_clock(mev.clock, condition) new_clock = default_clock(ncl1, ncl2) prev_transition.event = new_clock prev_transition.condition = new_condition prev_transition.note += str(mev.pmid) else: # new transition input_node = self.dict_node[input] output_node = self.dict_node[output] transition = self.top.add_transition(input_node, output_node) self.transition_count += 1 if condition is not None : transition.set_condition(condition) transition.event = mev.clock transition.note = 'PMID '+str(mev.pmid) self.dict_transition[inout] = transition
[docs] def location_converter(self, location): """ convert PID location coding into cadbiom one """ if self.location_dict.has_key(location) : self.location_dict[location] += 1 else : self.location_dict[location] = 1 # statistics if location in self.extern_loc: self.extern_count += 1 # short hands for locations if location == 'transmembrane': return '_transMb' elif location =='cytoplasm' or location == 'cytosol': return '_cy' elif location == 'mitochondria': return '_mi' elif location == 'nucleus': return '_nucl' elif location == 'extracellular region': return '_exCellRegion' elif location == 'vesicle': return '_v' elif location == 'calcium store': return '_calciumStore' elif location == 'endosome': return '_en' elif location == 'endoplasmic reticulum': return '_endoRetic' elif location == 'Golgi apparatus': return '_golgiAp' elif location == 'lysosome': return '_l' elif location == 'extracellular matrix': return '_exMatrix' elif location == 'plasma membrane': return '_plasmaMb' elif location == 'integral to membrane': return '_intToMb' elif location == 'cell-cell junction': return '_ccJct' elif location == 'hemidesmosome': return '_hd' elif location == 'caveola': return '_cav' # elif location =='early endosome': # return 'eend' # elif location =='basement membrane': # return 'bmb' else : # print 'location exception : ',location return '_'+location
[docs] def make_new_node(self, node_name, node_type): """ add a new node in the model """ if node_name in self.dict_node.keys(): pass else: name = node_name if name != '': xloc = self.coord_x yloc = self.coord_y if node_type == 'simple': node = self.top.add_simple_node(name, xloc, yloc) elif node_type == 'perm': node = self.top.add_perm_node(name, xloc, yloc) elif node_type == 'macro': node = self.top.add_macro_subnode(name, xloc, yloc, 0.20, 0.05) elif node_type == 'trap': node = self.top.add_trap_node(xloc, yloc, name) else : print('node type error') self.node_count += 1 self.dict_node[node_name] = node self.coord_inc()
[docs] def coord_inc(self): """ increment coordinates of nodes for a rectangular layout """ if self.compt == 19: self.compt = 1 self.coord_x = 0.01 self.coord_y += 0.03 else: self.compt += 1 self.coord_x += 0.05
[docs]def logical_or(list): """ @return: str - OR of the input list """ if len(list) == 0 : return None elif len(list) == 1 : return list[0] else : logical_formula = '' for elemnt in list : logical_formula += elemnt + ' or ' logical_formula = logical_formula[:-4] # print logical_formula return '('+logical_formula+')'
[docs]def when_clock(clo, cond): """ generate a when literal expression """ if not cond: return clo if len(cond) > 0: return '(' + clo + ') when (' + cond + ')' else: return clo
[docs]def default_clock(cl1, cl2): """ generate a literal default clock """ return '(' + cl1 + ') default (' + cl2 + ')'
[docs]def logical_and(lprop): """ @return: str - AND of the input list """ if len(lprop)==0 : return None elif len(lprop)==1 : return lprop[0] else : logical_formula = '' for elemnt in lprop : logical_formula += elemnt + ' and ' logical_formula = logical_formula[:-5] # print logical_formula return '('+logical_formula+')'
[docs]def ptm_converter(ptm_term): """ post transformation convertion from PID to Cadbiom """ if ptm_term == 'phosphorylation': return '_p' elif ptm_term == 'acetylation': return '_ac' elif ptm_term == 'methylation': return '_meth' elif ptm_term == 'sumoylation': return '_sumo' elif ptm_term == 'ubiquitination': return '_ub' else: return '_' + ptm_term
[docs]def make_name(name, act, ptm, loc): """ tgenerate a name """ if len(act)!=0: name += act if len(ptm)!=0: name += ptm if len(loc)!=0: name += loc return name
[docs]def make_new_name(name): """ remove unusable characters """ new_name = '' for cha in name: if cha == ' ' : continue elif cha == '/' : cha = '_' new_name += cha elif cha == '-': cha = '_' new_name += cha elif cha == '+': cha = 'PLUS' new_name += cha elif cha == '.': cha = 'POINT' new_name += cha elif cha == '(' or cha == ')': cha = '__' new_name += cha else: new_name += cha return new_name