Package libModelSimilarity :: Module calc_bimatrix
[hide private]
[frames] | no frames]

Source Code for Module libModelSimilarity.calc_bimatrix

  1  #!/usr/bin/env python 
  2   
  3  import sys, os, semanticSBML.annotate, libsbml, libSBAnnotation.database_new, cluster, fake_annotations, scipy, scipy.sparse 
  4   
  5  cluster.similarity_measure_version = 4 
  6  database_new_instance = libSBAnnotation.database_new.Meta_DB() 
  7  add_qsms = False 
  8  use_qsms = False 
  9  result_number = 3#sys.argv[2]#'3' 
 10  bimatrix_filename  = os.path.join( os.path.expanduser('~'), '.semanticSBML', 'bimatrix.csv' ) 
 11  simmatrix_filename = os.path.join( os.path.expanduser('~'), '.semanticSBML', 'simmatrix.csv' ) 
 12   
 13  use_libSBAnnotation = False 
 14  use_similarity_in_euklidian = False 
 15  use_semantic_density = False 
 16  if int(result_number) > 1: 
 17      use_libSBAnnotation = True 
 18  if int(result_number) > 2: 
 19      use_similarity_in_euklidian = True 
 20  if int(result_number) > 3: 
 21      use_semantic_density = True 
 22   
 23  global annotation_count 
 24  annotation_count = {} 
 25   
26 -class mycsv:
27 - def __init__(self):
28 self.matrix = []
29 - def load_from_dictionary(self, dictionary):
30 self.primary_keys = dictionary.keys() 31 self.secondary_keys = set([]) 32 [[self.secondary_keys.add(key2) for key2 in dictionary[key1]] for key1 in self.primary_keys] 33 self.secondary_keys = list(self.secondary_keys) 34 self.secondary_keys.sort() 35 self.matrix = [] 36 self.matrix.append( [""]+self.secondary_keys ) 37 for key1 in self.primary_keys: 38 line = [key1] 39 for key2 in self.secondary_keys: 40 val = '0.' 41 if key2 in dictionary[key1]: 42 val = str(dictionary[key1][key2]) 43 line.append(val) 44 self.matrix.append(line)
45 - def write_to_file(self,filename):
46 print 'Writing a matrix of size',len(self.matrix)-1,'x',len(self.matrix[1])-1 47 f = open(filename,'w') 48 for line in self.matrix: 49 f.write(','.join(line)+'\n') 50 f.close()
51 - def load_from_file(self, filename):
52 self.matrix = [] 53 f=open(filename,'r') 54 for line in f: 55 if not ',' in line: 56 continue 57 self.matrix.append( line.strip('\n').split(',') ) 58 self.primary_keys = [line[0] for line in self.matrix[1:]] 59 self.secondary_keys = self.matrix[0][1:]
60
61 -class AnnotationInformationObject:
62 - def __init__(self, libsbml_document_or_model_annotation, model_name):
63 global annotation_count 64 if "SBMLDocument" in str(type(libsbml_document_or_model_annotation)): 65 mea = semanticSBML.annotate.ModelElementsAnnotations(libsbml_document_or_model_annotation.getModel(), True) 66 else: 67 mea = libsbml_document_or_model_annotation 68 self.model_name = model_name 69 self.annotations = {} 70 self.description_of_annotations = {} 71 for ea in mea.getElementAnnotations(): 72 element_type = ea.libsbml_element.getElementName() 73 if element_type not in 'compartment species reaction'.split(): 74 continue 75 for annotation in ea.getAnnotations(): 76 abstract_id = database_new_instance.database_instance.__getAbstractIdByIdDict__({annotation.db:annotation.id}) 77 local_annotation_description = annotation.db+':'+annotation.id 78 if abstract_id != None and use_libSBAnnotation: 79 local_annotation_id = str(abstract_id) 80 else: 81 local_annotation_id = local_annotation_description 82 if use_qsms: 83 qualifier_score = cluster.qualifier_similarity_factor["is"][annotation.qualifier] 84 else: 85 qualifier_score = 1. 86 if local_annotation_id in self.annotations: 87 annotation_score = self.annotations[local_annotation_id] 88 else: 89 annotation_score = 0. 90 if add_qsms: 91 annotation_score = annotation_score + qualifier_score 92 else: 93 annotation_score = max(annotation_score, qualifier_score) 94 self.description_of_annotations[local_annotation_id] = local_annotation_description 95 self.annotations[local_annotation_id] = annotation_score 96 if use_semantic_density: 97 if local_annotation_id not in annotation_count: 98 annotation_count[local_annotation_id] = 1 99 annotation_count[local_annotation_id] = annotation_count[local_annotation_id] + 1
100 101
102 -class AnnotationsInModels:
103 - def __init__(self, path):
104 self.list_of_models = {} 105 self.description_of_annotations = {} 106 self.model_to_annotations = {} 107 filenames = os.listdir(path) 108 filenames.sort() 109 for filename in filenames: 110 if filename.startswith('BIOMD') and filename.endswith('.xml'): 111 libsbml_document = libsbml.readSBML(path+os.sep+filename) 112 model_name = str(int(filename[5:-4]))+' '+libsbml_document.getModel().getName() 113 aio = AnnotationInformationObject(libsbml_document, model_name) 114 self.model_to_annotations[aio.model_name] = aio.annotations 115 for local_annotation_id in aio.description_of_annotations: 116 if local_annotation_id not in self.description_of_annotations: 117 self.description_of_annotations[local_annotation_id] = aio.description_of_annotations[local_annotation_id] 118 _mycsv = mycsv() 119 _mycsv.load_from_dictionary(self.model_to_annotations) 120 _mycsv.write_to_file(bimatrix_filename)
121 122
123 -class CompareAnnotations:
124 - def __init__(self, max_distance, csv=None):
125 global annotation_count 126 log_total_annotation_count = scipy.log(sum(annotation_count.values())+1) 127 self.max_distance = max_distance 128 if csv == None: 129 self.csv = mycsv() 130 self.csv.load_from_file(bimatrix_filename) 131 else: 132 self.csv = csv 133 self.annotation_keys = self.csv.secondary_keys 134 self.known_annotation_keys = [int(x) for x in self.annotation_keys if not ":" in x] 135 self.direct_neighbourhood_cache = {} 136 self.neighbourhood_cache = {} 137 i = 0 138 j = len(self.known_annotation_keys) 139 libSBAnnotation.stderr.downloadProgress(j,i,True,False,"local neighbourhoods") 140 for known_annotation in self.known_annotation_keys: 141 self.neighbourhood_cache[known_annotation] = self.generate_neighbourhood(known_annotation) 142 i=i+1 143 libSBAnnotation.stderr.downloadProgress(j,i,False,False) 144 libSBAnnotation.stderr.downloadProgress(j,i,False,True) 145 self.similarity_matrix = [] 146 self.aid_to_SingleAnnotation = {} 147 for index1 in range(len(self.annotation_keys)): 148 anno1 = self.annotation_keys[index1] 149 if not ':' in anno1: 150 anno1 = int(anno1) 151 similarity_line = [] 152 for index2 in range(len(self.annotation_keys)): 153 anno2 = self.annotation_keys[index2] 154 if not ':' in anno2: 155 anno2 = int(anno2) 156 if index1 == index2: 157 this_similarity = 1. 158 elif type(anno1) == type(1) and type(anno2) == type(1) and anno2 in self.neighbourhood_cache[anno1]: 159 sa1 = self.abstract_id_to_single_annotation(anno1) 160 sa2 = self.abstract_id_to_single_annotation(anno2) 161 this_similarity = sa1.similarity(sa2) 162 else: 163 this_similarity = 0. 164 if use_semantic_density: 165 local_semantic_density1 = 1. - (scipy.log(annotation_count[anno1])/log_total_annotation_count) if anno1 in annotation_count else 1. - (1./log_total_annotation_count) 166 local_semantic_density2 = 1. - (scipy.log(annotation_count[anno2])/log_total_annotation_count) if anno2 in annotation_count else 1. - (1./log_total_annotation_count) 167 this_similarity = this_similarity * local_semantic_density1 * local_semantic_density2 168 similarity_line.append(this_similarity) 169 self.similarity_matrix.append(similarity_line) 170 self.similarity_matrix = scipy.sparse.csc_matrix(self.similarity_matrix)
171 - def abstract_id_to_single_annotation(self, abstract_id):
172 if abstract_id not in self.aid_to_SingleAnnotation: 173 db = "libSBAnnotation" 174 id = abstract_id 175 qualifier = 'is' 176 self.aid_to_SingleAnnotation[abstract_id] = cluster.SingleAnnotation( fake_annotations.Annotation( db, id, qualifier ) ) 177 self.aid_to_SingleAnnotation[abstract_id].max_steps = self.max_distance 178 self.aid_to_SingleAnnotation[abstract_id].construct_aid_distances() 179 return self.aid_to_SingleAnnotation[abstract_id]
180 - def generate_neighbourhood(self, abstract_id):
181 neighbourhood = [abstract_id] 182 queue = [] 183 nextqueue = [abstract_id] 184 for step in range(self.max_distance): 185 queue = nextqueue 186 nextqueue = [] 187 for aid in queue: 188 neighbours = self.generate_direct_neighbourhood(aid) 189 for neighbour in neighbours: 190 if neighbour not in neighbourhood: 191 nextqueue.append(neighbour) 192 neighbourhood.append(neighbour) 193 return neighbourhood
194 - def generate_direct_neighbourhood(self, abstract_id):
195 if abstract_id in self.direct_neighbourhood_cache: 196 return self.direct_neighbourhood_cache[abstract_id] 197 relation_object = database_new_instance.database_instance.__getRelationshipsByAbstractId__(abstract_id) 198 related_abstract_ids = relation_object.getRelatedAbstractIds() 199 self.direct_neighbourhood_cache[abstract_id] = related_abstract_ids 200 return related_abstract_ids
201 - def dump_similarity_matrix(self, filename):
202 import pickle 203 f = open(filename, 'w') 204 pickle.dump(self.similarity_matrix, f) 205 f.close()
206
207 -class FakeCompareAnnotations:
208 - def __init__(self):
209 self.similarity_matrix = [] 210 self.csv = mycsv() 211 self.csv.load_from_file(bimatrix_filename) 212 self.annotation_keys = self.csv.secondary_keys 213 self.known_annotation_keys = [int(x) for x in self.annotation_keys if not ":" in x]
214 - def load_similarity_matrix(self, filename):
215 import pickle 216 f = open(filename, 'r') 217 self.similarity_matrix = pickle.load(f) 218 f.close()
219
220 -class EuklideanDistance:
221 - def __init__(self, compare_annotations):
222 self.compare_annotations = compare_annotations 223 self.model_to_vector = {} 224 for line in self.compare_annotations.csv.matrix[1:]: 225 model_name = line[0] 226 vector = [float(x) for x in line[1:]] 227 vector = scipy.matrix(vector).transpose() 228 self.model_to_vector[model_name] = vector 229 self.similarity_matrix = self.compare_annotations.similarity_matrix
230 - def similarity(self, vector1, vector2):
231 len_vector1 = scipy.sqrt( (vector1.transpose()*vector1)[0,0] ) 232 len_vector2 = scipy.sqrt( (vector2.transpose()*vector2)[0,0] ) 233 if len_vector1 == 0 or len_vector2 == 0: 234 similarity = 0. 235 elif not use_similarity_in_euklidian: 236 similarity = (vector1.transpose()*vector2)[0,0] / len_vector1 / len_vector2 237 else: 238 # the following lines should be commented in again in order to make the similarity measure the angle between the transformed vectors 239 len_vector1 = scipy.sqrt( (vector1.transpose()*self.similarity_matrix*vector1)[0,0] ) 240 len_vector2 = scipy.sqrt( (vector2.transpose()*self.similarity_matrix*vector2)[0,0] ) 241 similarity = (vector1.transpose()*self.similarity_matrix*vector2)[0,0] / len_vector1 / len_vector2 242 return similarity
243 - def compute_matrix(self,filename):
244 # output in Falkos graph format 245 f = open(filename, 'w') 246 model_names = self.compare_annotations.csv.primary_keys 247 for index1 in range(len(model_names)): 248 print index1 249 for index2 in range(index1+1, len(model_names)): 250 sim = self.similarity(self.model_to_vector[model_names[index1]], 251 self.model_to_vector[model_names[index2]]) 252 if sim > 0.: 253 bm1 = "BM"+model_names[index1].split()[0] 254 bm2 = "BM"+model_names[index2].split()[0] 255 f.write(bm1+'\t(sim)\t'+bm2+'\t=\t'+str(sim)+'\n') 256 f.close()
257
258 -class DatabaseAnnotation:
259 - def __init__(self):
260 self.max_steps = 1 261 self.empty = True
262 - def isEmpty(self):
263 return self.empty
264 - def compute_matrix(self, filename):
265 return self.ed.compute_matrix(filename)
266 - def load_from_path(self, path):
267 self.aim = AnnotationsInModels(path) 268 self.ca = CompareAnnotations(self.max_steps) 269 # putting the new neighbours into the bimatrix 270 for known_aid in self.ca.neighbourhood_cache: 271 neighbour_aids = self.ca.neighbourhood_cache[known_aid] 272 for neighbour_aid in neighbour_aids: 273 if neighbour_aid not in self.ca.known_annotation_keys: 274 # put this aid into the matrix, a similarity will be computed from it later 275 self.ca.csv.secondary_keys.append(str(neighbour_aid)) 276 self.ca.known_annotation_keys.append(neighbour_aid) 277 self.ca.csv.matrix[0].append(str(neighbour_aid)) 278 for line in self.ca.csv.matrix[1:]: 279 line.append('0.') 280 # saving the new bimatrix 281 self.ca.csv.write_to_file(bimatrix_filename) 282 # reload the Compare Annotations instance 283 self.ca = CompareAnnotations(self.max_steps) 284 self.ca.dump_similarity_matrix(simmatrix_filename) 285 self.load_from_cache()
286 - def load_from_cache(self):
287 self.fca = FakeCompareAnnotations() 288 self.fca.load_similarity_matrix(simmatrix_filename) 289 self.ed = EuklideanDistance(self.fca) 290 self.empty = False
291 - def compare_to_file(self, libsbml_document_or_filename):
292 if type(libsbml_document_or_filename) == type(""): 293 libsbml_document = libsbml.readSBML(libsbml_document_or_filename) 294 filename = libsbml_document_or_filename 295 else: 296 libsbml_document = libsbml_document_or_filename 297 filename = "" 298 aio = AnnotationInformationObject(libsbml_document, filename) 299 return self.compare_to_annotation_information_object(aio)
300 - def compare_model_annotation_object(self, model_annotation_object):
301 aio = AnnotationInformationObject(model_annotation_object, "new file") 302 return self.compare_to_annotation_information_object(aio)
304 vector = [] 305 for aid in self.fca.annotation_keys: 306 value = 0. 307 if aid in aio.annotations: 308 value = aio.annotations[aid] 309 vector.append(value) 310 vector = scipy.matrix(vector).transpose() 311 model_names = self.fca.csv.primary_keys 312 model_names_to_sim = {} 313 for index1 in range(len(model_names)): 314 sim = self.ed.similarity(self.ed.model_to_vector[model_names[index1]], 315 vector) 316 returned_model_name = int(model_names[index1].split()[0]) 317 model_names_to_sim[returned_model_name] = sim 318 return model_names_to_sim
319 - def write_dict_dict_file(self, filename):
320 import pickle 321 f=open(filename,'w') 322 pickle.dump(self.biomodels_similarity(),f) 323 f.close()
324 - def biomodels_similarity(self):
325 dd = {} 326 model_names = self.ed.compare_annotations.csv.primary_keys 327 for model_name1 in model_names: 328 model_number1 = int(model_name1.split()[0]) 329 dd[model_number1] = {} 330 for model_name2 in model_names: 331 model_number2 = int(model_name2.split()[0]) 332 sim = self.ed.similarity(self.ed.model_to_vector[model_name1], 333 self.ed.model_to_vector[model_name2]) 334 dd[model_number1][model_number2] = sim 335 return dd
336 337 338 if __name__ == "__main__": 339 import datetime 340 341 aim = AnnotationsInModels(sys.argv[1]) 342 ca = CompareAnnotations(1) 343 ed = EuklideanDistance(ca) 344 #ed.compute_matrix('results/ce'+result_number+'.attribute') 345 346 da = DatabaseAnnotation() 347 da.load_from_path(sys.argv[1]) 348 349 #da.load_from_cache() 350 da.compute_matrix('results/ce'+result_number+'.attribute') 351 print da.compare_to_file("/home/schulzma/Desktop/cancer_relevant_receptors.xml") 352 #da.write_dict_dict_file("biomodels_dd.pickle") 353