1
2
3 import sys, os, semanticSBML.annotate, libsbml, libSBAnnotation.database_new, cluster, fake_annotations, scipy, scipy.sparse
4
5 cluster.similarity_measure_version = 4
6 database_new_instance = libSBAnnotation.database_new.Meta_DB()
7 add_qsms = False
8 use_qsms = False
9 result_number = 3
10 bimatrix_filename = os.path.join( os.path.expanduser('~'), '.semanticSBML', 'bimatrix.csv' )
11 simmatrix_filename = os.path.join( os.path.expanduser('~'), '.semanticSBML', 'simmatrix.csv' )
12
13 use_libSBAnnotation = False
14 use_similarity_in_euklidian = False
15 use_semantic_density = False
16 if int(result_number) > 1:
17 use_libSBAnnotation = True
18 if int(result_number) > 2:
19 use_similarity_in_euklidian = True
20 if int(result_number) > 3:
21 use_semantic_density = True
22
23 global annotation_count
24 annotation_count = {}
25
30 self.primary_keys = dictionary.keys()
31 self.secondary_keys = set([])
32 [[self.secondary_keys.add(key2) for key2 in dictionary[key1]] for key1 in self.primary_keys]
33 self.secondary_keys = list(self.secondary_keys)
34 self.secondary_keys.sort()
35 self.matrix = []
36 self.matrix.append( [""]+self.secondary_keys )
37 for key1 in self.primary_keys:
38 line = [key1]
39 for key2 in self.secondary_keys:
40 val = '0.'
41 if key2 in dictionary[key1]:
42 val = str(dictionary[key1][key2])
43 line.append(val)
44 self.matrix.append(line)
52 self.matrix = []
53 f=open(filename,'r')
54 for line in f:
55 if not ',' in line:
56 continue
57 self.matrix.append( line.strip('\n').split(',') )
58 self.primary_keys = [line[0] for line in self.matrix[1:]]
59 self.secondary_keys = self.matrix[0][1:]
60
100
101
104 self.list_of_models = {}
105 self.description_of_annotations = {}
106 self.model_to_annotations = {}
107 filenames = os.listdir(path)
108 filenames.sort()
109 for filename in filenames:
110 if filename.startswith('BIOMD') and filename.endswith('.xml'):
111 libsbml_document = libsbml.readSBML(path+os.sep+filename)
112 model_name = str(int(filename[5:-4]))+' '+libsbml_document.getModel().getName()
113 aio = AnnotationInformationObject(libsbml_document, model_name)
114 self.model_to_annotations[aio.model_name] = aio.annotations
115 for local_annotation_id in aio.description_of_annotations:
116 if local_annotation_id not in self.description_of_annotations:
117 self.description_of_annotations[local_annotation_id] = aio.description_of_annotations[local_annotation_id]
118 _mycsv = mycsv()
119 _mycsv.load_from_dictionary(self.model_to_annotations)
120 _mycsv.write_to_file(bimatrix_filename)
121
122
124 - def __init__(self, max_distance, csv=None):
125 global annotation_count
126 log_total_annotation_count = scipy.log(sum(annotation_count.values())+1)
127 self.max_distance = max_distance
128 if csv == None:
129 self.csv = mycsv()
130 self.csv.load_from_file(bimatrix_filename)
131 else:
132 self.csv = csv
133 self.annotation_keys = self.csv.secondary_keys
134 self.known_annotation_keys = [int(x) for x in self.annotation_keys if not ":" in x]
135 self.direct_neighbourhood_cache = {}
136 self.neighbourhood_cache = {}
137 i = 0
138 j = len(self.known_annotation_keys)
139 libSBAnnotation.stderr.downloadProgress(j,i,True,False,"local neighbourhoods")
140 for known_annotation in self.known_annotation_keys:
141 self.neighbourhood_cache[known_annotation] = self.generate_neighbourhood(known_annotation)
142 i=i+1
143 libSBAnnotation.stderr.downloadProgress(j,i,False,False)
144 libSBAnnotation.stderr.downloadProgress(j,i,False,True)
145 self.similarity_matrix = []
146 self.aid_to_SingleAnnotation = {}
147 for index1 in range(len(self.annotation_keys)):
148 anno1 = self.annotation_keys[index1]
149 if not ':' in anno1:
150 anno1 = int(anno1)
151 similarity_line = []
152 for index2 in range(len(self.annotation_keys)):
153 anno2 = self.annotation_keys[index2]
154 if not ':' in anno2:
155 anno2 = int(anno2)
156 if index1 == index2:
157 this_similarity = 1.
158 elif type(anno1) == type(1) and type(anno2) == type(1) and anno2 in self.neighbourhood_cache[anno1]:
159 sa1 = self.abstract_id_to_single_annotation(anno1)
160 sa2 = self.abstract_id_to_single_annotation(anno2)
161 this_similarity = sa1.similarity(sa2)
162 else:
163 this_similarity = 0.
164 if use_semantic_density:
165 local_semantic_density1 = 1. - (scipy.log(annotation_count[anno1])/log_total_annotation_count) if anno1 in annotation_count else 1. - (1./log_total_annotation_count)
166 local_semantic_density2 = 1. - (scipy.log(annotation_count[anno2])/log_total_annotation_count) if anno2 in annotation_count else 1. - (1./log_total_annotation_count)
167 this_similarity = this_similarity * local_semantic_density1 * local_semantic_density2
168 similarity_line.append(this_similarity)
169 self.similarity_matrix.append(similarity_line)
170 self.similarity_matrix = scipy.sparse.csc_matrix(self.similarity_matrix)
172 if abstract_id not in self.aid_to_SingleAnnotation:
173 db = "libSBAnnotation"
174 id = abstract_id
175 qualifier = 'is'
176 self.aid_to_SingleAnnotation[abstract_id] = cluster.SingleAnnotation( fake_annotations.Annotation( db, id, qualifier ) )
177 self.aid_to_SingleAnnotation[abstract_id].max_steps = self.max_distance
178 self.aid_to_SingleAnnotation[abstract_id].construct_aid_distances()
179 return self.aid_to_SingleAnnotation[abstract_id]
181 neighbourhood = [abstract_id]
182 queue = []
183 nextqueue = [abstract_id]
184 for step in range(self.max_distance):
185 queue = nextqueue
186 nextqueue = []
187 for aid in queue:
188 neighbours = self.generate_direct_neighbourhood(aid)
189 for neighbour in neighbours:
190 if neighbour not in neighbourhood:
191 nextqueue.append(neighbour)
192 neighbourhood.append(neighbour)
193 return neighbourhood
195 if abstract_id in self.direct_neighbourhood_cache:
196 return self.direct_neighbourhood_cache[abstract_id]
197 relation_object = database_new_instance.database_instance.__getRelationshipsByAbstractId__(abstract_id)
198 related_abstract_ids = relation_object.getRelatedAbstractIds()
199 self.direct_neighbourhood_cache[abstract_id] = related_abstract_ids
200 return related_abstract_ids
206
209 self.similarity_matrix = []
210 self.csv = mycsv()
211 self.csv.load_from_file(bimatrix_filename)
212 self.annotation_keys = self.csv.secondary_keys
213 self.known_annotation_keys = [int(x) for x in self.annotation_keys if not ":" in x]
219
221 - def __init__(self, compare_annotations):
222 self.compare_annotations = compare_annotations
223 self.model_to_vector = {}
224 for line in self.compare_annotations.csv.matrix[1:]:
225 model_name = line[0]
226 vector = [float(x) for x in line[1:]]
227 vector = scipy.matrix(vector).transpose()
228 self.model_to_vector[model_name] = vector
229 self.similarity_matrix = self.compare_annotations.similarity_matrix
231 len_vector1 = scipy.sqrt( (vector1.transpose()*vector1)[0,0] )
232 len_vector2 = scipy.sqrt( (vector2.transpose()*vector2)[0,0] )
233 if len_vector1 == 0 or len_vector2 == 0:
234 similarity = 0.
235 elif not use_similarity_in_euklidian:
236 similarity = (vector1.transpose()*vector2)[0,0] / len_vector1 / len_vector2
237 else:
238
239 len_vector1 = scipy.sqrt( (vector1.transpose()*self.similarity_matrix*vector1)[0,0] )
240 len_vector2 = scipy.sqrt( (vector2.transpose()*self.similarity_matrix*vector2)[0,0] )
241 similarity = (vector1.transpose()*self.similarity_matrix*vector2)[0,0] / len_vector1 / len_vector2
242 return similarity
244
245 f = open(filename, 'w')
246 model_names = self.compare_annotations.csv.primary_keys
247 for index1 in range(len(model_names)):
248 print index1
249 for index2 in range(index1+1, len(model_names)):
250 sim = self.similarity(self.model_to_vector[model_names[index1]],
251 self.model_to_vector[model_names[index2]])
252 if sim > 0.:
253 bm1 = "BM"+model_names[index1].split()[0]
254 bm2 = "BM"+model_names[index2].split()[0]
255 f.write(bm1+'\t(sim)\t'+bm2+'\t=\t'+str(sim)+'\n')
256 f.close()
257
260 self.max_steps = 1
261 self.empty = True
325 dd = {}
326 model_names = self.ed.compare_annotations.csv.primary_keys
327 for model_name1 in model_names:
328 model_number1 = int(model_name1.split()[0])
329 dd[model_number1] = {}
330 for model_name2 in model_names:
331 model_number2 = int(model_name2.split()[0])
332 sim = self.ed.similarity(self.ed.model_to_vector[model_name1],
333 self.ed.model_to_vector[model_name2])
334 dd[model_number1][model_number2] = sim
335 return dd
336
337
338 if __name__ == "__main__":
339 import datetime
340
341 aim = AnnotationsInModels(sys.argv[1])
342 ca = CompareAnnotations(1)
343 ed = EuklideanDistance(ca)
344
345
346 da = DatabaseAnnotation()
347 da.load_from_path(sys.argv[1])
348
349
350 da.compute_matrix('results/ce'+result_number+'.attribute')
351 print da.compare_to_file("/home/schulzma/Desktop/cancer_relevant_receptors.xml")
352
353