1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
20 """ texte passé en paramètre des fonctions """
22 """ Calcule des dictionnaires des positions et des couts fixes
23
24 pre: isinstance(L,list)
25 len(L)>0
26 #post: forall([texte[x[0]:x[1]] in __return__[0].keys() for x in L])
27 """
28 dicoPos = {}
29 coutFixe = {}
30 cumul=0
31 for i in xrange(len(L)):
32
33
34 chaine, longueur = L[i]
35 try:
36 dicoPos[chaine].append(i)
37 except KeyError:
38 dicoPos[chaine] = [i]
39 coutFixe[i]=cumul
40 cumul+=longueur
41 return dicoPos,coutFixe
42
44 L1 = [] ; L2 = [] ; Lcf1 = [] ; Lcf2 = [] ; LH1 = [] ; LH2 = []
45 for bloc in Lv1:
46 cle = hash(self.texte[bloc[0]:bloc[1]])
47 L1.append(cle)
48 Lcf1.append((cle,bloc[1]-bloc[0]))
49 LH1.append((cle, bloc[0], bloc[1]))
50 print LH1
51 logging.debug('init L1 done')
52 for bloc in Lv2:
53 cle = hash(self.texte[bloc[0]:bloc[1]])
54 L2.append(cle)
55 Lcf2.append((cle,bloc[1]-bloc[0]))
56 LH2.append((cle, bloc[0], bloc[1]))
57 logging.debug('init L2 done')
58
59 dicoPos = {}
60 coutFixe = {}
61 dicoPos[1], coutFixe[1] = self.calcPosCoutFixe(Lcf1,self.texte)
62 dicoPos[2], coutFixe[2] = self.calcPosCoutFixe(Lcf2,self.texte)
63 for cle in dicoPos[1]: assert cle in dicoPos[2]
64 for cle in dicoPos[2]: assert cle in dicoPos[1]
65
66 diffSym = self.preTraitDiffSym(LH1,LH2,self.texte,dicoPos)
67 best, closedSet = self.deplacements_pond_Astar(L1,L2,self.texte,diffSym,dicoPos,coutFixe)
68 dicoBest1={}
69 dicoBest2={}
70 while best != (-1,-1):
71 dicoBest1[best[0]]=1
72 dicoBest2[best[1]]=1
73 best=closedSet[best][1]
74
75 LResDep=[]
76 LResBC=[]
77 for i in xrange(len(L1)):
78 if not dicoBest1.has_key(i):LResDep.append(L1[i])
79 else:LResBC.append(L1[i])
80 for i in xrange(len(L2)):
81 if not dicoBest2.has_key(i):LResDep.append(L2[i])
82 else:LResBC.append(L2[i])
83
84 del dicoPos, coutFixe, dicoBest1, dicoBest2
85
86 return LResDep,LResBC
87
88
90 """ implémentation de A* pour rechercher le meilleur chemin dans l'arbre des appariement entre blocs
91 Renvoie le noeud du dernier appariement """
92 openSet={}
93 closedSet={}
94 L1=L1Static
95 L2=L2Static
96 best=(-1,-1)
97 while True:
98 for posB1 in xrange(len(L1)):
99
100 for posB2 in dicoPos[2][L1[posB1]]:
101 if posB2<best[1]+1: continue
102 node=(posB1+best[0]+1,posB2)
103 cout=self.cout(best[0]+1,best[1]+1,posB1+best[0]+1,posB2,diffSym,coutFixe)
104 if closedSet.has_key(node):
105 if closedSet[node][0]>cout:
106 openSet[node]=(cout,best)
107 del closedSet[node]
108 elif openSet.has_key(node):
109 if openSet[node][0]>cout:
110 openSet[node]=(cout,best)
111 else: openSet[node]=(cout,best)
112 best=None
113 bestValue=None
114 for node in openSet:
115 if (best==None or openSet[node][0]<bestValue[0]):
116 best=node
117 bestValue=openSet[node]
118 L1=L1Static[best[0]+1:]
119 L2=L2Static[best[1]+1:]
120 del openSet[best]
121 closedSet[best]=bestValue
122 if (L1==[] or L2==[] or len(L1)+len(L2)==diffSym[(best[0],best[1])][1]):
123 return best,closedSet
124
125 - def cout(self,d1,d2,f1,f2,diffSym,coutFixe):
126 """ calcul du cout d'un noeud """
127 cF1=coutFixe[1][f1]-coutFixe[1][d1]
128 cF2=coutFixe[2][f2]-coutFixe[2][d2]
129 coutHeuristique=diffSym[(f1,f2)][0]
130
131 return cF1+cF2+coutHeuristique
132
134 try:
135 return diffSym[(f1,f2)]
136 except KeyError:
137 pass
138
140 diffSym={}
141 LH1=L1 ; LH2=L2
142 a='''LH1 = [] ; LH2 = []
143 for bloc in L1:
144 LH1.append((bloc[0], bloc[1]-bloc[1]))
145 logging.debug('init LH1 done')
146 for bloc in L2:
147 LH2.append((bloc[0], bloc[1]-bloc[1]))
148 logging.debug('init LH2 done')'''
149 logging.log(5,'len(LH2)= '+str(len(LH2))+' / len(LH1)= '+str(len(LH1)))
150
151 len_L1 = len(LH1) ; len_L2 = len(LH2)
152
153
154
155 class Mset_(set):
156 def evaluate(self):
157 res = 0
158 for cle,val in self:
159 res += val
160 return res
161 class Mset(dict):
162 def __init__(self, liste=[]):
163 assert isinstance(liste, list)
164 self.valeur = 0
165 self.length = 0
166 for (val,deb,fin) in liste:
167 try:
168 nb,longueur = self[val]
169 self[val] = nb+1,longueur
170 except KeyError:
171 self[val] = (1, fin-deb)
172 self.valeur += fin - deb
173 self.length += 1
174 def difference(self, mset2, renvoieReste=False):
175 assert isinstance(mset2, Mset)
176 res = Mset()
177 for cle,(nb, longueur) in self.iteritems():
178 if cle not in mset2:
179 res[cle] = (nb, longueur)
180 res.valeur += nb * longueur
181 res.length += nb
182 else:
183 (nb2, longueur2) = mset2[cle]
184 if nb > nb2:
185 res[cle] = (nb-nb2, longueur)
186 res.valeur += (nb-nb2) * longueur
187 res.length += nb-nb2
188 return res
189 def difference_liste(self, mset2, renvoieReste=False):
190 assert isinstance(mset2, list)
191 res = Mset()
192 for cle,(nb, longueur) in self.iteritems():
193 if cle not in mset2:
194 res[cle] = (nb, longueur)
195 res.valeur += nb * longueur
196 res.length += nb
197 else:
198 (nb2, longueur2) = mset2[cle]
199 if nb > nb2:
200 res[cle] = (nb-nb2, longueur)
201 res.valeur += (nb-nb2) * longueur
202 res.length += nb-nb2
203 return res
204 def intersection(self, mset2):
205 assert isinstance(mset2, Mset)
206 res = Mset()
207 if self.length <= mset2.length: grand = mset2 ; petit = self
208 else: grand = self ; petit = mset2
209 for cle,(nb, longueur) in petit.iteritems():
210 try:
211 (nb2, longueur2) = grand[cle]
212 if nb <= nb2: nbToAdd = nb
213 else: nbToAdd = nb2
214 res[cle] = (nbToAdd, longueur)
215 res.valeur += nbToAdd * longueur
216 res.length += nbToAdd
217 except KeyError:
218 pass
219 return res
220 i=0
221 for posB1 in xrange(len_L1-1,-1,-1):
222 if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,'itération LH1: '+str(i))
223 LL1 = Mset(LH1[posB1+1:])
224 if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,' done MSet1')
225 liste_pos2 = dicoPos[2][LH1[posB1][0]]
226 j = len(liste_pos2)-1
227 while j >= 0:
228 if j == len(liste_pos2)-1:
229 LL2 = Mset(LH2[liste_pos2[j]+1:]) ; ds = nb = 0
230 LL1res = LL1.difference(LL2)
231 LL2res = LL2.difference(LL1)
232 else:
233 posB2 = liste_pos2[j]
234 next_posB2 = liste_pos2[j+1]
235
236
237 new_LL1 = Mset(LH1[posB2+1:next_posB2+1])
238 LL1res = LL1res.difference(LL2res.intersection(new_LL1))
239 LL2res = LL2res.difference(new_LL1)
240
241 ds2 = LL1res.valeur + LL2res.valeur
242 nb2 = LL1res.length + LL2.length
243
244
245 diffSym[(posB1,liste_pos2[j])] = (ds2,nb2)
246 j -= 1
247 diffSym[(posB1,liste_pos2[0])] = (ds2,nb2)
248 if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,' done itération interne')
249 i+=1
250 return diffSym
251
253 logging.log(5,'begin preTraitDiffSymVect')
254 diffSym={}
255 LH1=L1 ; LH2=L2
256 logging.log(5,'len(LH2)= '+str(len(LH2))+' / len(LH1)= '+str(len(LH1)))
257 dic_alphabet = {}
258 for cle,deb,fin in L1:
259 dic_alphabet[cle] = fin-deb
260 for cle,deb,fin in L2:
261 dic_alphabet[cle] = fin-deb
262 taille_alphabet = len(dic_alphabet)
263 temp = []
264 for cle in dic_alphabet:
265 bisect.insort_right(temp, cle)
266 array_pos_alphabet = numarray.array(temp)
267 assert len(array_pos_alphabet) == taille_alphabet
268 array_valeur_alphabet = numarray.zeros(taille_alphabet)
269 for pos in xrange(taille_alphabet):
270 cle = array_pos_alphabet[pos]
271 val = dic_alphabet[cle]
272
273 dic_alphabet[cle] = pos
274 array_valeur_alphabet[pos] = val
275
276 LH1 = [cle for cle,deb,fin in LH1]
277 LHH1 = [dic_alphabet[cle] for cle in LH1]
278
279
280
281
282
283
284
285
286
287
288
289
290
291 LH2 = [cle for cle,deb,fin in LH2]
292 LHH2 = [dic_alphabet[cle] for cle in LH2]
293
294
295
296
297
298
299
300
301
302 logging.log(4,'LH1 = '+str(LH1))
303 logging.log(4,'LH2 = '+str(LH2))
304 logging.log(4,'array_pos_alphabet = '+str(array_pos_alphabet))
305 logging.log(4,'array_valeur_alphabet = '+str(array_valeur_alphabet))
306 logging.log(4,'dic_alphabet = '+str(dic_alphabet))
307
308
309
310
311 logging.log(5,'creation alphabet done')
312
313 current = numarray.zeros(taille_alphabet)
314 accu = numarray.zeros(taille_alphabet)
315 def liste_to_alphab_array(liste,array):
316
317
318
319
320
321
322
323
324
325 numarray.logical_and(current,array_zero, current)
326 numarray.logical_and(accu,array_zero, accu)
327 for i in xrange(len(liste)-1,-1,-1):
328 pos = LHH2[i]
329 numarray.where(numarray.arange(taille_alphabet)==pos,1,0,out=current)
330 numarray.add(accu,current,accu)
331 array = accu
332
333
334
335
336
337
338 assert len(liste) == numarray.sum(array),str(len(liste))+' / '+str(numarray.sum(array))
339
340 def alphab_array_val_length(array):
341 val = length = 0
342 for pos in xrange(taille_alphabet):
343 nb = max(0,array[pos])
344 length += nb
345 val += nb * array_valeur_alphabet[pos]
346
347
348 assert 0 <= length <= val
349
350
351
352 return val, length
353
354 len_L1 = len(LH1) ; len_L2 = len(LH2)
355 i=0
356 array_zero = numarray.zeros(taille_alphabet)
357 array_inter = numarray.zeros(taille_alphabet)
358 LL1 = numarray.zeros(taille_alphabet)
359 LL2 = numarray.zeros(taille_alphabet)
360 LL1res = numarray.zeros(taille_alphabet)
361 LL2res = numarray.zeros(taille_alphabet)
362 new_LL1 = numarray.zeros(taille_alphabet)
363 for posB1 in xrange(len_L1-1,-1,-1):
364 if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,'itération LH1: '+str(i))
365 if i == 300: break
366
367 numarray.logical_and(LL1,array_zero, LL1)
368 assert numarray.sum(LL1) == 0
369 liste_to_alphab_array(LHH1[posB1+1:], LL1)
370
371
372
373
374 if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,' done MSet1')
375
376 liste_pos2 = dicoPos[2][LH1[posB1]]
377 j = len(liste_pos2)-1
378 while j >= 0:
379
380
381
382
383
384 if j == len(liste_pos2)-1:
385 numarray.logical_and(LL2,array_zero, LL2)
386 numarray.logical_and(LL1res,array_zero, LL1res)
387 numarray.logical_and(LL2res,array_zero, LL2res)
388 assert numarray.sum(LL2) == numarray.sum(LL1res) == numarray.sum(LL2res) == 0
389
390 liste_to_alphab_array(LHH2[liste_pos2[j]+1:], LL2) ; ds = nb = 0
391
392
393
394
395
396 numarray.subtract(LL1,LL2, LL1res)
397
398
399 numarray.subtract(LL2,LL1, LL2res)
400
401
402
403 else:
404 posB2 = liste_pos2[j]
405 next_posB2 = liste_pos2[j+1]
406 numarray.logical_and(new_LL1,array_zero, new_LL1)
407 numarray.logical_and(array_inter,array_zero, array_inter)
408 assert numarray.sum(new_LL1) == numarray.sum(array_inter) == 0
409
410 liste_to_alphab_array(LHH1[posB2+1:next_posB2+1], new_LL1)
411
412
413
414
415
416
417 numarray.minimum(LL2res,new_LL1,array_inter)
418
419
420 numarray.subtract(LL1res,array_inter, LL1res)
421
422
423 numarray.subtract(LL2res,new_LL1, LL2res)
424
425
426
427
428
429
430 d1,n1 = alphab_array_val_length(LL1res)
431 d2,n2 = alphab_array_val_length(LL2res)
432 ds2 = d1 + d2 ; nb2 = n1 + n2
433 diffSym[(posB1,liste_pos2[j])] = (ds2,nb2)
434 j -= 1
435 diffSym[(posB1,liste_pos2[0])] = (ds2,nb2)
436 if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,' done itération interne')
437 i+=1
438
439 return diffSym
440
442 r='''class dicH(weakref.WeakKeyDictionary):
443 def __init__(self, dico=None):
444 if dico is None:
445 self.totalItem = 0
446 self.valeur = 0
447 else:
448 for cle,nbItem in dico.iteritems():
449 self[cle] = nbItem #[pos for pos in liste_pos]
450 self.totalItem = dico.totalItem
451 self.valeur = dico.valeur'''
452 def _addBloc(bloc):
453 (cle, debut, fin) = bloc
454 try:
455
456 dico[cle] += 1
457 except KeyError:
458 dico[cle] = 1
459 dico['valeur'] += fin - debut
460 dico['totalItem'] += 1
461 def _testBloc(bloc):
462 (cle, debut, fin) = bloc
463 try:
464
465
466
467
468
469 if diffSymCourante[cle] == 1:
470 del diffSymCourante[cle]
471 else:
472 diffSymCourante[cle] -= 1
473 diffSymCourante['valeur'] -= fin - debut
474 diffSymCourante['totalItem'] -= 1
475 except KeyError:
476 diffSymCourante[cle] = 1
477 diffSymCourante['valeur'] += fin - debut
478 diffSymCourante['totalItem'] += 1
479
480 diffSym={}
481
482 LH1 = [] ; LH2 = []
483 for bloc in L1:
484 LH1.append((hash(texte[bloc[0]:bloc[1]]), bloc[0], bloc[1]))
485 logging.debug('init LH1 done')
486 for bloc in L2:
487 LH2.append((hash(texte[bloc[0]:bloc[1]]), bloc[0], bloc[1]))
488 logging.debug('init LH2 done')
489
490 dico = {'valeur':0, 'totalItem':0}
491
492
493
494
495 ld=i=0
496 logging.debug('len(LH2)= '+str(len(LH2))+' / len(LH1)= '+str(len(LH1)))
497 for pos2 in xrange(len(LH2)-1,-1,-1):
498 if (i<10 or i>len(LH2)-10 or i%10==0): logging.debug('itération LH2: '+str(i));k=True
499 else:k=False
500 if pos2 < (len(LH2)-1):
501 _addBloc(LH2[pos2+1])
502 assert dico['totalItem'] == ld + 1 ; ld+=1
503
504 if pos2 > 0: diffSymCourante = weakref.WeakKeyDictionary(dico.copy())
505 else: diffSymCourante = dico
506 logging.debug(' copie dico done')
507 j=0
508 for pos1 in xrange(len(LH1)-1,-1,-1):
509
510 if pos1 < (len(LH1)-1): _testBloc(LH1[pos1+1])
511 diffSym[(pos1,pos2)] = (diffSymCourante['valeur'],diffSymCourante['totalItem'])
512 j+=1
513 logging.debug(' itération interne done')
514 i+=1
515
516 a='''for pos2 in xrange(len(LH2)-1,-1,-1):
517 if pos2 < len(LH2)-1: _testBloc(LH2[pos2+1], True)
518 diffSymCourante = dico.copy()
519 prev_taille = diffSymCourante['totalItem']
520 for pos1 in xrange(len(LH1)):
521 if pos1 < len(LH1)-1: _testBloc(LH1[pos1+1])
522 diffSym[(pos1,pos2)] = (diffSymCourante['valeur'],diffSymCourante['totalItem'])
523 #assert diffSymCourante['totalItem'] < prev_taille, str(diffSymCourante['totalItem'])+' '+str(prev_taille)'''
524 assert len(diffSym) == len(L1) * len(L2), str(len(diffSym))+' '+str(len(L1))+' '+str(len(L2))
525
526
527
528
529
530
531 return diffSym
532
533
535 def _addBloc(bloc):
536 cle = hash(texte[bloc[0]:bloc[1]])
537 try:
538 dico[cle].append(bloc[0])
539 except KeyError:
540 dico[cle] = [bloc[0]]
541 dico['valeur'] += bloc[1] - bloc[0]
542 dico['nbItem'] += 1
543 diffSym={}
544
545 L = {1:L1, 2:L2}
546 tabDiffSym = {}
547 dico = {'valeur':0, 'nbItem':0}
548 tabDiffSym[(len(L1),-1)] = {}
549 for i in xrange(len(L1)-1,-1,-1):
550 _addBloc(L1[i])
551 dico_courant = dico.copy()
552 tabDiffSym[(i,-1)] = dico_courant
553 assert dico['nbItem'] == len(L1)
554
555 tabDiffSym[(len(L1),-1)] = {'valeur':0, 'nbItem':0} ; bloc = {}
556 for j in xrange(len(L2)-1,-1,-1):
557 bloc[2] = L2[j]
558 for i in xrange(len(L1)-1,-1,-1):
559 bloc[1] = L1[i]
560 diffSymCourante = tabDiffSym[(i+1,-1)]
561
562 for k in [1,2]:
563 cle = hash(texte[bloc[k][0]:bloc[k][1]])
564 try:
565 liste_pos_bloc = diffSymCourante[cle]
566 try:
567 liste_pos_bloc.pop()
568 except IndexError:
569 del diffSymCourante[cle]
570 diffSymCourante['valeur'] -= bloc[k][1] - bloc[k][0]
571 diffSymCourante['nbItem'] -= 1
572 except KeyError:
573 diffSymCourante[cle] = [bloc[k][0]]
574 diffSymCourante['valeur'] += bloc[k][1] - bloc[k][0]
575 diffSymCourante['nbItem'] += 1
576 tabDiffSym[(i,0)] = diffSymCourante
577 diffSym[(i,j)] = (diffSymCourante['valeur'],diffSymCourante['nbItem'])
578 for i in xrange(len(L1)):
579 tabDiffSym[(i,-1)] = tabDiffSym[(i,0)]
580 del tabDiffSym[(i,0)]
581 dico = tabDiffSym[(len(L1),-1)]
582 _addBloc(L2[j])
583 tabDiffSym[(len(L1),-1)] = dico
584 assert len(tabDiffSym) == len(L1)+1
585 assert len(diffSym) == len(L1) * len(L2),str(len(diffSym))+str(len(L1))+str(len(L2))
586 return diffSym
587
589 """ Précalcul de toutes les différences symétriques possibles.
590 Ainsi chacune est calculée une seule fois et non un nombre combinatoire de fois si on faisait le calcul à la demande
591 pre: forall([texte[x[0]:x[1]] in dicoPos[1].keys() for x in L1])
592 forall([texte[x[0]:x[1]] in dicoPos[2].keys() for x in L1])
593 forall([texte[x[0]:x[1]] in dicoPos[1].keys() for x in L2])
594 forall([texte[x[0]:x[1]] in dicoPos[2].keys() for x in L2])
595 len(self.tool.difference(L1,L2))>0
596 """
597 diffSym={}
598
599 posB1=0
600 for B1 in L1:
601 for posB2 in dicoPos[2][texte[B1[0]:B1[1]]]:
602 L=self.difference_symetrique(L1[posB1+1:],L2[posB2+1:],texte,posB1,posB2,dicoPos)
603 cout=0
604 for B in L:
605 cout += B[1]-B[0]
606 diffSym[(posB1,posB2)]=(cout,len(L))
607 posB1+=1
608 assert len(diffSym) <= len(L1) * len(L2),str(len(diffSym))+' '+str(len(L1))+' '+str(len(L2))
609 return diffSym
610
612 """ différence symétrique entre 2 listes de blocs
613 pre: forall([texte[x[0]:x[1]] in dicoPos[1].iterkeys() for x in L1])
614 forall([texte[x[0]:x[1]] in dicoPos[2].iterkeys() for x in L1])
615 forall([texte[x[0]:x[1]] in dicoPos[1].iterkeys() for x in L2])
616 forall([texte[x[0]:x[1]] in dicoPos[2].iterkeys() for x in L2])
617 len(self.tool.difference(L1,L2))>0
618 forall(L1, lambda x:texte[x[0]:x[1]] in dicoPos[1].iterkeys())
619 forall(L1, lambda x:texte[x[0]:x[1]] in dicoPos[2].iterkeys())
620 forall(L2, lambda x:texte[x[0]:x[1]] in dicoPos[1].iterkeys())
621 forall(L2, lambda x:texte[x[0]:x[1]] in dicoPos[2].iterkeys())
622 """
623 if (len(L1)==0 and len(L2)==0): return []
624 elif (len(L1)==0): return L2
625 elif (len(L2)==0): return L1
626 LRes=[]
627 for B1 in L1:
628 found=False
629 for posB2 in dicoPos[2][texte[B1[0]:B1[1]]]:
630 if posB2>=deb2+1: found=True
631 if not found: LRes.append(B1)
632 for B2 in L2:
633 found=False
634 for posB1 in dicoPos[1][texte[B2[0]:B2[1]]]:
635 if posB1>=deb1+1: found=True
636 if not found: LRes.append(B2)
637
638 return LRes
639
641 """ Alignement A* entre les 2 séquences
642 pre: isinstance(L1,list) and isinstance(L2,list) and isinstance(texte1,str) and isinstance(texte2,str)
643 post: (len(__return__[0])==len(__return__[1])) or (len(__return__[0])==len(__return__[1])+1) or \
644 (len(__return__[0])+1==len(__return__[1]))"""
645
646 logging.log(5,"debut _appelAstar")
647 LC1 = [] ; LC2 = [] ; Lcf1 = [] ; Lcf2 = [] ; LH1 = [] ; LH2 = []
648 for bloc in L1:
649 cle = hash(texte1[bloc[0]:bloc[1]])
650 LC1.append(cle)
651 Lcf1.append((cle,bloc[1]-bloc[0]))
652 LH1.append((cle, bloc[0], bloc[1]))
653
654 logging.log(5,'init L1 done')
655 for bloc in L2:
656 cle = hash(texte2[bloc[0]-lt1:bloc[1]-lt1])
657 LC2.append(cle)
658 Lcf2.append((cle,bloc[1]-bloc[0]))
659 LH2.append((cle, bloc[0], bloc[1]))
660 logging.log(5,'init L2 done')
661 dicoPos = {}
662 coutFixe = {}
663 dicoPos[1], coutFixe[1] = self.calcPosCoutFixe(Lcf1)
664 dicoPos[2], coutFixe[2] = self.calcPosCoutFixe(Lcf2)
665 if __debug__:
666 for cle in dicoPos[1]: assert cle in dicoPos[2], str([texte1[d:f] for d,f in L1])+' '+str([texte2[d-lt1:f-lt1] for d,f in L2])
667 for cle in dicoPos[2]: assert cle in dicoPos[1]
668 dic = {} ; tri = []
669 for cle,deb,fin in LH1:
670 try:
671 val,lpos = dic[cle]
672 lpos.append((deb,fin))
673 dic[cle] = val+1, lpos
674 except KeyError: dic[cle] = 1 , [(deb,fin)]
675 for cle,(nb,lpos) in dic.iteritems():
676 bisect.insort_right(tri,(nb,lpos))
677 for nb,lpos in tri[-20:]:
678 deb = lpos[0][0] ; fin =lpos[0][1]
679 logging.debug('LH1: nb = '+str(nb)+' / '+texte1[deb:fin])
680 dic = {} ; tri = []
681 for cle,deb,fin in LH2:
682 try:
683 val,lpos = dic[cle]
684 lpos.append((deb,fin))
685 dic[cle] = val+1, lpos
686 except KeyError: dic[cle] = 1 , [(deb,fin)]
687 for cle,(nb,lpos) in dic.iteritems():
688 bisect.insort_right(tri,(nb,lpos))
689 for nb,lpos in tri[-20:]:
690 deb = lpos[0][0] ; fin =lpos[0][1]
691 logging.debug('LH2: nb = '+str(nb)+' / '+texte2[deb-lt1:fin-lt1])
692
693 tri = []
694 for cle,liste in dicoPos[1].iteritems():
695 bisect.insort_right(tri,len(liste))
696 logging.debug('tri liste1 = '+str(tri))
697 logging.debug("longueur liste1 = %d, moyenne = %.2f, mediane = %d",len(tri),+(0.0+sum(tri))/len(tri),tri[len(tri)/2])
698 tri = []
699 for cle,liste in dicoPos[2].iteritems():
700 bisect.insort_right(tri,len(liste))
701 logging.debug('tri liste2 = '+str(tri))
702 logging.debug('longueur liste2 = %d, moyenne = %.2f, mediane = %d',len(tri),(0.0+sum(tri))/len(tri),tri[len(tri)/2])
703 logging.log(5,"fin calcPosCoutFixe")
704
705 diffSym = self.preTraitDiffSym(LH1,LH2,dicoPos)
706
707
708
709
710
711 logging.log(5,"fin preTraitDiffSym")
712
713
714
715
716
717
718
719 best, closedSet = self.deplacements_pond_Astar(LC1,LC2,diffSym,dicoPos,coutFixe)
720 logging.log(5,"fin deplacements_pond_Astar")
721 dicoBest={1:{}, 2:{}}
722 while best != (-1,-1):
723 dicoBest[1][best[0]]=1
724 dicoBest[2][best[1]]=1
725 best=closedSet[best][1]
726
727 s1=self.__makeLRes(dicoBest[1],L1)
728 s2=self.__makeLRes(dicoBest[2],L2)
729 logging.debug('s1 = '+str(s1))
730 logging.debug('s2 = '+str(s2))
731 if __debug__:
732 s11=[]
733 s12=[]
734 for i in xrange(len(s1)):
735 s11.append(s1[i][0])
736 s12.extend(s1[i][1])
737 if s11[-1] is None: s11=s11[:-1]
738 s21=[]
739 s22=[]
740 for i in xrange(len(s2)):
741 s21.append(s2[i][0])
742 s22.extend(s2[i][1])
743 if s21[-1] is None: s21=s21[:-1]
744 self.ass2__(s11,0,lt1,texte1)
745 self.ass2__(s12,0,lt1,texte1)
746 texte=texte1+texte2
747 self.ass2__(s21,lt1,len(texte1)+len(texte2),texte)
748 self.ass2__(s22,lt1,len(texte1)+len(texte2),texte)
749 self.ass2__(s11+s21,0,len(texte1)+len(texte2),texte)
750 self.ass2__(s12+s22,0,len(texte1)+len(texte2),texte)
751 del dicoPos, coutFixe, dicoBest
752 logging.log(5,"fin _appelAstar")
753 return s1,s2
754