# -*- coding: iso-8859-1 -*- # Copyright 20003 - 2008: Julien Bourdaillet (julien.bourdaillet@lip6.fr), Jean-Gabriel Ganascia (jean-gabriel.ganascia@lip6.fr) # This file is part of MEDITE. # # MEDITE is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # MEDITE is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Foobar; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA class AlignAstar2(Align): """ texte passé en paramètre des fonctions """ def calcPosCoutFixe(self, L): #, texte): """ Calcule des dictionnaires des positions et des couts fixes pre: isinstance(L,list) len(L)>0 #post: forall([texte[x[0]:x[1]] in __return__[0].keys() for x in L]) """ dicoPos = {} coutFixe = {} cumul=0 for i in xrange(len(L)): #chaine = texte[L[i][0]:L[i][1]] #longueur = L[i][1]-L[i][0] chaine, longueur = L[i] try: dicoPos[chaine].append(i) # ajout de sa position except KeyError: dicoPos[chaine] = [i] coutFixe[i]=cumul #cumul des couts (leur longueur) des blocs précédents cumul+=longueur #L[i][1]-L[i][0] return dicoPos,coutFixe def deplacements_pond(self, Lv1, Lv2): L1 = [] ; L2 = [] ; Lcf1 = [] ; Lcf2 = [] ; LH1 = [] ; LH2 = [] for bloc in Lv1: cle = hash(self.texte[bloc[0]:bloc[1]]) L1.append(cle) Lcf1.append((cle,bloc[1]-bloc[0])) LH1.append((cle, bloc[0], bloc[1])) print LH1 logging.debug('init L1 done') for bloc in Lv2: cle = hash(self.texte[bloc[0]:bloc[1]]) L2.append(cle) Lcf2.append((cle,bloc[1]-bloc[0])) LH2.append((cle, bloc[0], bloc[1])) logging.debug('init L2 done') # dicoPos1 stocke pour chaque bloc de L1 ses positions dans cette liste dicoPos = {} coutFixe = {} dicoPos[1], coutFixe[1] = self.calcPosCoutFixe(Lcf1,self.texte) dicoPos[2], coutFixe[2] = self.calcPosCoutFixe(Lcf2,self.texte) for cle in dicoPos[1]: assert cle in dicoPos[2] for cle in dicoPos[2]: assert cle in dicoPos[1] diffSym = self.preTraitDiffSym(LH1,LH2,self.texte,dicoPos) # pré-calcul des différences symétriques best, closedSet = self.deplacements_pond_Astar(L1,L2,self.texte,diffSym,dicoPos,coutFixe) # A* dicoBest1={} # dico des positions dans L1 du meilleur parcours dicoBest2={} # dico des positions dans L2 du meilleur parcours while best != (-1,-1): # on remonte le meilleur parcours trouvé dicoBest1[best[0]]=1 dicoBest2[best[1]]=1 best=closedSet[best][1] # noeud père LResDep=[] LResBC=[] for i in xrange(len(L1)): if not dicoBest1.has_key(i):LResDep.append(L1[i]) else:LResBC.append(L1[i]) for i in xrange(len(L2)): if not dicoBest2.has_key(i):LResDep.append(L2[i]) else:LResBC.append(L2[i]) del dicoPos, coutFixe, dicoBest1, dicoBest2 return LResDep,LResBC def deplacements_pond_Astar(self, L1Static, L2Static, diffSym, dicoPos, coutFixe): """ implémentation de A* pour rechercher le meilleur chemin dans l'arbre des appariement entre blocs Renvoie le noeud du dernier appariement """ openSet={} # ensemble des sommets ouverts pour A* closedSet={} # ensemble des sommets fermés pour A* L1=L1Static # liste L1 courante L2=L2Static best=(-1,-1) while True: for posB1 in xrange(len(L1)): #for posB2 in dicoPos[2][texte[L1[posB1][0]:L1[posB1][1]]]: for posB2 in dicoPos[2][L1[posB1]]: if posB2cout: openSet[node]=(cout,best) del closedSet[node] elif openSet.has_key(node): if openSet[node][0]>cout: openSet[node]=(cout,best) else: openSet[node]=(cout,best) best=None # noeud (posL1, posL2) bestValue=None # valeur (cout,pere) for node in openSet: # recherche du noeud de moindre cout if (best==None or openSet[node][0]len_L1-10 or i%100==0): logging.log(5,' done MSet1') liste_pos2 = dicoPos[2][LH1[posB1][0]]#texte[L1[posB1][0]:L1[posB1][1]]] j = len(liste_pos2)-1 while j >= 0: if j == len(liste_pos2)-1: LL2 = Mset(LH2[liste_pos2[j]+1:]) ; ds = nb = 0 LL1res = LL1.difference(LL2) LL2res = LL2.difference(LL1) else: posB2 = liste_pos2[j] next_posB2 = liste_pos2[j+1] #(ds,nb,LL1,LL2) = diffSym[(posB1,next_posB2)] #diffSym[(posB1,next_posB2)] = (ds,nb) new_LL1 = Mset(LH1[posB2+1:next_posB2+1]) LL1res = LL1res.difference(LL2res.intersection(new_LL1)) LL2res = LL2res.difference(new_LL1) ds2 = LL1res.valeur + LL2res.valeur nb2 = LL1res.length + LL2.length #ds2 = LL1res.evaluate() + LL2res.evaluate() #nb2 = len(LL1res) + len(LL2res) diffSym[(posB1,liste_pos2[j])] = (ds2,nb2)#,LL1res, LL2res) j -= 1 diffSym[(posB1,liste_pos2[0])] = (ds2,nb2) if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,' done itération interne') i+=1 return diffSym def preTraitDiffSymVect(self,L1,L2,dicoPos,niveau=0): logging.log(5,'begin preTraitDiffSymVect') diffSym={} # dico[(i,j)] stocke le cout heuristique et la longueur de la liste résultant de la différence symétrique entre L1[i+1:] et L2[j+1:] LH1=L1 ; LH2=L2 logging.log(5,'len(LH2)= '+str(len(LH2))+' / len(LH1)= '+str(len(LH1))) dic_alphabet = {} for cle,deb,fin in L1: dic_alphabet[cle] = fin-deb for cle,deb,fin in L2: dic_alphabet[cle] = fin-deb taille_alphabet = len(dic_alphabet) temp = [] for cle in dic_alphabet: bisect.insort_right(temp, cle) array_pos_alphabet = numarray.array(temp) # ordre des lettres de l'alphabet assert len(array_pos_alphabet) == taille_alphabet array_valeur_alphabet = numarray.zeros(taille_alphabet) # poids de chaque lettre for pos in xrange(taille_alphabet):# MAJ dico avec position cle = array_pos_alphabet[pos] val = dic_alphabet[cle] #dic_alphabet[cle] = val,pos dic_alphabet[cle] = pos array_valeur_alphabet[pos] = val LH1 = [cle for cle,deb,fin in LH1] LHH1 = [dic_alphabet[cle] for cle in LH1] #logging.debug('len(LHH1)=%d, taille_alphabet=%d,len(LHH1)*taille_alphabet=%d',len(LHH1), taille_alphabet,len(LHH1)*taille_alphabet) #array_LH1 = numarray.zeros((len(LHH1),taille_alphabet),numarray.Int8) #current = numarray.zeros(taille_alphabet) #accu = numarray.zeros(taille_alphabet) #logging.debug('i=%d',len(LHH1)) #for i in xrange(len(LHH1)-1,-1,-1): # if i%1000 ==0: logging.debug('i=%d',i) # pos = LHH1[i] # numarray.where(numarray.arange(taille_alphabet)==pos,1,0,out=current) # numarray.add(accu,current,accu) # array_LH1[i] = accu #logging.debug('array_LH1 done') LH2 = [cle for cle,deb,fin in LH2] LHH2 = [dic_alphabet[cle] for cle in LH2] #array_LH2 = numarray.zeros((len(LHH2),taille_alphabet),numarray.Int8) #current = numarray.zeros(taille_alphabet) #accu = numarray.zeros(taille_alphabet) #for i in xrange(len(LHH2)-1,-1,-1): # pos = LHH2[i] # numarray.where(numarray.arange(taille_alphabet)==pos,1,0,out=current) # numarray.add(accu,current,accu) # array_LH2[i] = accu #logging.debug('array_LH2 done') logging.log(4,'LH1 = '+str(LH1)) logging.log(4,'LH2 = '+str(LH2)) logging.log(4,'array_pos_alphabet = '+str(array_pos_alphabet)) logging.log(4,'array_valeur_alphabet = '+str(array_valeur_alphabet)) logging.log(4,'dic_alphabet = '+str(dic_alphabet)) #for pos in xrange(taille_alphabet): # cle = array_pos_alphabet[pos] # val = dic_alphabet[cle] # array_valeur_alphabet[pos] = val logging.log(5,'creation alphabet done') current = numarray.zeros(taille_alphabet) accu = numarray.zeros(taille_alphabet) def liste_to_alphab_array(liste,array): #c=0 #for cle,deb,fin in liste: #for cle in liste: #for pos in liste: #val,pos = dic_alphabet[cle] #array[pos] += 1 #array[dic_alphabet[cle]] += 1 # array[pos] += 1 #c +=1 numarray.logical_and(current,array_zero, current) numarray.logical_and(accu,array_zero, accu) for i in xrange(len(liste)-1,-1,-1): pos = LHH2[i] numarray.where(numarray.arange(taille_alphabet)==pos,1,0,out=current) numarray.add(accu,current,accu) array = accu #map(lambda pos:array[pos]=array[pos]+1,liste) #for p in [dic_alphabet[cle] for cle in liste]: array[p] +=1 #j=0 #for i in array: assert i>=0,i ; j+=i #print Numeric.sum(array), numarray.add.reduce(array),j #assert len(liste) == c assert len(liste) == numarray.sum(array),str(len(liste))+' / '+str(numarray.sum(array)) def alphab_array_val_length(array): val = length = 0 for pos in xrange(taille_alphabet): nb = max(0,array[pos]) length += nb val += nb * array_valeur_alphabet[pos] #if nb>0: assert val>0 #assert length == numarray.sum(array) assert 0 <= length <= val #if val == 1769: # print liste # print array return val, length len_L1 = len(LH1) ; len_L2 = len(LH2) i=0 array_zero = numarray.zeros(taille_alphabet)#,numarray.UInt8) array_inter = numarray.zeros(taille_alphabet)#,numarray.UInt8) LL1 = numarray.zeros(taille_alphabet)#,numarray.UInt8) LL2 = numarray.zeros(taille_alphabet)#,numarray.UInt8) LL1res = numarray.zeros(taille_alphabet)#,numarray.UInt8) LL2res = numarray.zeros(taille_alphabet)#,numarray.UInt8) new_LL1 = numarray.zeros(taille_alphabet)#,numarray.UInt8) for posB1 in xrange(len_L1-1,-1,-1): if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,'itération LH1: '+str(i)) if i == 300: break #LL1 = numarray.zeros(taille_alphabet,numarray.Int8) numarray.logical_and(LL1,array_zero, LL1) assert numarray.sum(LL1) == 0 liste_to_alphab_array(LHH1[posB1+1:], LL1) #LL1 = array_LH1[posB1+1] #for pos in LHH1[posB1+1:]: # LL1[pos] += 1 #logging.log(4,'LL1 = '+str(LL1)) if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,' done MSet1') #liste_pos2 = dicoPos[2][LH1[posB1][0]]#texte[L1[posB1][0]:L1[posB1][1]]] liste_pos2 = dicoPos[2][LH1[posB1]] j = len(liste_pos2)-1 while j >= 0: #posB2 = liste_pos2[j] #LL2 = array_LH2[posB2] #liste_to_alphab_array(LHH2[posB2+1:], LL2) #numarray.subtract(LL1,LL2, LL1res) #numarray.subtract(LL2,LL1, LL2res) if j == len(liste_pos2)-1: numarray.logical_and(LL2,array_zero, LL2) numarray.logical_and(LL1res,array_zero, LL1res) numarray.logical_and(LL2res,array_zero, LL2res) assert numarray.sum(LL2) == numarray.sum(LL1res) == numarray.sum(LL2res) == 0 #LL2 = liste_to_alphab_array(LHH2[liste_pos2[j]+1:], LL2) ; ds = nb = 0 #LL2 = array_LH2[liste_pos2[j]+1] #for pos in LHH2[liste_pos2[j]+1:]: # LL2[pos] += 1 #logging.log(4,'LL2 = '+str(LL2)) #LL1res = numarray.subtract(LL1,LL2, LL1res) #numarray.maximum(LL1res,array_zero,LL1res) #LL2res = numarray.subtract(LL2,LL1, LL2res) #numarray.maximum(LL2res,array_zero,LL2res) #logging.log(4,'LL1res = '+str(LL1res)) #logging.log(4,'LL2res = '+str(LL2res)) else: posB2 = liste_pos2[j] next_posB2 = liste_pos2[j+1] numarray.logical_and(new_LL1,array_zero, new_LL1) numarray.logical_and(array_inter,array_zero, array_inter) assert numarray.sum(new_LL1) == numarray.sum(array_inter) == 0 #new_LL1 = liste_to_alphab_array(LHH1[posB2+1:next_posB2+1], new_LL1) #new_LL1 = array_LH1[liste_pos2[j]+1] #for pos in LHH1[posB2+1:next_posB2+1]: # new_LL1[pos] += 1 # new_LL1 = Mset(LH1[posB2+1:next_posB2+1]) # LL1res = LL1res.difference(LL2res.intersection(new_LL1)) # LL2res = LL2res.difference(new_LL1) numarray.minimum(LL2res,new_LL1,array_inter) #numarray.maximum(array_inter,array_zero,array_inter) numarray.subtract(LL1res,array_inter, LL1res) #numarray.maximum(LL1res,array_zero,LL1res) numarray.subtract(LL2res,new_LL1, LL2res) #numarray.maximum(LL2res,array_zero,LL2res) #logging.log(4,'array_inter = '+str(array_inter)) #logging.log(4,'LL1res = '+str(LL1res)) #logging.log(4,'LL2res = '+str(LL2res)) #ds2 = LL1res.valeur + LL2res.valeur #nb2 = LL1res.length + LL2.length d1,n1 = alphab_array_val_length(LL1res) d2,n2 = alphab_array_val_length(LL2res) ds2 = d1 + d2 ; nb2 = n1 + n2 diffSym[(posB1,liste_pos2[j])] = (ds2,nb2)#,LL1res, LL2res) j -= 1 diffSym[(posB1,liste_pos2[0])] = (ds2,nb2) if (i<10 or i>len_L1-10 or i%100==0): logging.log(5,' done itération interne') i+=1 #logging.debug('diffSym = '+str(diffSym)) return diffSym def preTraitDiffSym2(self,L1,L2,texte,dicoPos,niveau=0): r='''class dicH(weakref.WeakKeyDictionary): def __init__(self, dico=None): if dico is None: self.totalItem = 0 self.valeur = 0 else: for cle,nbItem in dico.iteritems(): self[cle] = nbItem #[pos for pos in liste_pos] self.totalItem = dico.totalItem self.valeur = dico.valeur''' def _addBloc(bloc): (cle, debut, fin) = bloc try: #(nbItem, valeur) = self[cle] dico[cle] += 1 #(nbItem+1,valeur) except KeyError: dico[cle] = 1 #(1, fin-debut) #[debut] dico['valeur'] += fin - debut dico['totalItem'] += 1 def _testBloc(bloc): (cle, debut, fin) = bloc try: #(nbItem, valeur) = self[cle] # liste_pos_bloc = self[cle] #liste_pos_bloc.pop() #if len(liste_pos_bloc) == 0: # del self[cle] if diffSymCourante[cle] == 1: del diffSymCourante[cle] else: diffSymCourante[cle] -= 1 #(nbItem-1, valeur) diffSymCourante['valeur'] -= fin - debut diffSymCourante['totalItem'] -= 1 except KeyError: diffSymCourante[cle] = 1 #[debut] diffSymCourante['valeur'] += fin - debut diffSymCourante['totalItem'] += 1 diffSym={} # dico[(i,j)] stocke le cout heuristique et la longueur de la liste résultant # de la différence symétrique entre L1[i+1:] et L2[j+1:] LH1 = [] ; LH2 = [] for bloc in L1: LH1.append((hash(texte[bloc[0]:bloc[1]]), bloc[0], bloc[1])) logging.debug('init LH1 done') for bloc in L2: LH2.append((hash(texte[bloc[0]:bloc[1]]), bloc[0], bloc[1])) logging.debug('init LH2 done') dico = {'valeur':0, 'totalItem':0} #{} #dicH() # #dico.valeur = 0 ; dico.totalItem = 0 #for i in xrange(2,len(LH1)-1): # _addBloc(LH1[i+1]) #assert dico['totalItem'] == len(L1)-3 ld=i=0 logging.debug('len(LH2)= '+str(len(LH2))+' / len(LH1)= '+str(len(LH1))) for pos2 in xrange(len(LH2)-1,-1,-1): if (i<10 or i>len(LH2)-10 or i%10==0): logging.debug('itération LH2: '+str(i));k=True else:k=False if pos2 < (len(LH2)-1): _addBloc(LH2[pos2+1]) assert dico['totalItem'] == ld + 1 ; ld+=1 #print 'totalItem=',dico#['totalItem'] if pos2 > 0: diffSymCourante = weakref.WeakKeyDictionary(dico.copy()) else: diffSymCourante = dico logging.debug(' copie dico done') j=0 for pos1 in xrange(len(LH1)-1,-1,-1): #if k and (j<10 or j>len(LH1)-10 or j%10==0): logging.debug(' itération LH1: '+str(j)) if pos1 < (len(LH1)-1): _testBloc(LH1[pos1+1]) diffSym[(pos1,pos2)] = (diffSymCourante['valeur'],diffSymCourante['totalItem']) j+=1 logging.debug(' itération interne done') i+=1 a='''for pos2 in xrange(len(LH2)-1,-1,-1): if pos2 < len(LH2)-1: _testBloc(LH2[pos2+1], True) diffSymCourante = dico.copy() prev_taille = diffSymCourante['totalItem'] for pos1 in xrange(len(LH1)): if pos1 < len(LH1)-1: _testBloc(LH1[pos1+1]) diffSym[(pos1,pos2)] = (diffSymCourante['valeur'],diffSymCourante['totalItem']) #assert diffSymCourante['totalItem'] < prev_taille, str(diffSymCourante['totalItem'])+' '+str(prev_taille)''' assert len(diffSym) == len(L1) * len(L2), str(len(diffSym))+' '+str(len(L1))+' '+str(len(L2)) #str1 = '' ; str2 = '' #for i in xrange(len(L1)): # str1 += str(diffSym[(i,0)]) +' ' # str2 += str(diffSym[(i,len(LH2)-1)]) +' ' #print str1 #print str2 return diffSym def preTraitDiffSym__(self,L1,L2,texte,dicoPos,niveau=0): def _addBloc(bloc): cle = hash(texte[bloc[0]:bloc[1]]) try: dico[cle].append(bloc[0]) except KeyError: dico[cle] = [bloc[0]] dico['valeur'] += bloc[1] - bloc[0] dico['nbItem'] += 1 diffSym={} # dico[(i,j)] stocke le cout heuristique et la longueur de la liste résultant # de la différence symétrique entre L1[i+1:] et L2[j+1:] L = {1:L1, 2:L2} tabDiffSym = {} dico = {'valeur':0, 'nbItem':0} tabDiffSym[(len(L1),-1)] = {} for i in xrange(len(L1)-1,-1,-1): _addBloc(L1[i]) dico_courant = dico.copy() tabDiffSym[(i,-1)] = dico_courant assert dico['nbItem'] == len(L1) tabDiffSym[(len(L1),-1)] = {'valeur':0, 'nbItem':0} ; bloc = {} for j in xrange(len(L2)-1,-1,-1): bloc[2] = L2[j] for i in xrange(len(L1)-1,-1,-1): bloc[1] = L1[i] diffSymCourante = tabDiffSym[(i+1,-1)] #print diffSymCourante for k in [1,2]: cle = hash(texte[bloc[k][0]:bloc[k][1]]) try: liste_pos_bloc = diffSymCourante[cle] try: liste_pos_bloc.pop() except IndexError: del diffSymCourante[cle] diffSymCourante['valeur'] -= bloc[k][1] - bloc[k][0] diffSymCourante['nbItem'] -= 1 except KeyError: diffSymCourante[cle] = [bloc[k][0]] diffSymCourante['valeur'] += bloc[k][1] - bloc[k][0] diffSymCourante['nbItem'] += 1 tabDiffSym[(i,0)] = diffSymCourante diffSym[(i,j)] = (diffSymCourante['valeur'],diffSymCourante['nbItem']) for i in xrange(len(L1)): tabDiffSym[(i,-1)] = tabDiffSym[(i,0)] del tabDiffSym[(i,0)] dico = tabDiffSym[(len(L1),-1)] _addBloc(L2[j]) tabDiffSym[(len(L1),-1)] = dico assert len(tabDiffSym) == len(L1)+1 assert len(diffSym) == len(L1) * len(L2),str(len(diffSym))+str(len(L1))+str(len(L2)) return diffSym def preTraitDiffSym_(self,L1,L2,texte,dicoPos,niveau=0): """ Précalcul de toutes les différences symétriques possibles. Ainsi chacune est calculée une seule fois et non un nombre combinatoire de fois si on faisait le calcul à la demande pre: forall([texte[x[0]:x[1]] in dicoPos[1].keys() for x in L1]) forall([texte[x[0]:x[1]] in dicoPos[2].keys() for x in L1]) forall([texte[x[0]:x[1]] in dicoPos[1].keys() for x in L2]) forall([texte[x[0]:x[1]] in dicoPos[2].keys() for x in L2]) len(self.tool.difference(L1,L2))>0 """ diffSym={} # dico[(i,j)] stocke le cout heuristique et la longueur de la liste résultant # de la différence symétrique entre L1[i+1:] et L2[j+1:] posB1=0 for B1 in L1: for posB2 in dicoPos[2][texte[B1[0]:B1[1]]]: L=self.difference_symetrique(L1[posB1+1:],L2[posB2+1:],texte,posB1,posB2,dicoPos) cout=0 for B in L: cout += B[1]-B[0] diffSym[(posB1,posB2)]=(cout,len(L)) posB1+=1 assert len(diffSym) <= len(L1) * len(L2),str(len(diffSym))+' '+str(len(L1))+' '+str(len(L2)) return diffSym def difference_symetrique(self,L1,L2,texte,deb1,deb2,dicoPos): """ différence symétrique entre 2 listes de blocs pre: forall([texte[x[0]:x[1]] in dicoPos[1].iterkeys() for x in L1]) forall([texte[x[0]:x[1]] in dicoPos[2].iterkeys() for x in L1]) forall([texte[x[0]:x[1]] in dicoPos[1].iterkeys() for x in L2]) forall([texte[x[0]:x[1]] in dicoPos[2].iterkeys() for x in L2]) len(self.tool.difference(L1,L2))>0 forall(L1, lambda x:texte[x[0]:x[1]] in dicoPos[1].iterkeys()) forall(L1, lambda x:texte[x[0]:x[1]] in dicoPos[2].iterkeys()) forall(L2, lambda x:texte[x[0]:x[1]] in dicoPos[1].iterkeys()) forall(L2, lambda x:texte[x[0]:x[1]] in dicoPos[2].iterkeys()) """ if (len(L1)==0 and len(L2)==0): return [] elif (len(L1)==0): return L2 elif (len(L2)==0): return L1 LRes=[] for B1 in L1: found=False for posB2 in dicoPos[2][texte[B1[0]:B1[1]]]: if posB2>=deb2+1: found=True if not found: LRes.append(B1) for B2 in L2: found=False for posB1 in dicoPos[1][texte[B2[0]:B2[1]]]: if posB1>=deb1+1: found=True if not found: LRes.append(B2) #sys.stderr.write("\ndiff_sym L1="+str(L1)+"\nL2="+str(L2)+"\n="+str(LRes)+"\n") return LRes def _appelAstar(self,L1,L2,texte1,texte2,lt1): """ Alignement A* entre les 2 séquences pre: isinstance(L1,list) and isinstance(L2,list) and isinstance(texte1,str) and isinstance(texte2,str) post: (len(__return__[0])==len(__return__[1])) or (len(__return__[0])==len(__return__[1])+1) or \ (len(__return__[0])+1==len(__return__[1]))""" # dicoPos1 stocke pour chaque bloc de L1 ses positions dans cette liste logging.log(5,"debut _appelAstar") LC1 = [] ; LC2 = [] ; Lcf1 = [] ; Lcf2 = [] ; LH1 = [] ; LH2 = [] for bloc in L1: cle = hash(texte1[bloc[0]:bloc[1]]) LC1.append(cle) Lcf1.append((cle,bloc[1]-bloc[0])) LH1.append((cle, bloc[0], bloc[1])) #print LH1 logging.log(5,'init L1 done') for bloc in L2: cle = hash(texte2[bloc[0]-lt1:bloc[1]-lt1]) LC2.append(cle) Lcf2.append((cle,bloc[1]-bloc[0])) LH2.append((cle, bloc[0], bloc[1])) logging.log(5,'init L2 done') dicoPos = {} coutFixe = {} dicoPos[1], coutFixe[1] = self.calcPosCoutFixe(Lcf1) dicoPos[2], coutFixe[2] = self.calcPosCoutFixe(Lcf2) if __debug__: for cle in dicoPos[1]: assert cle in dicoPos[2], str([texte1[d:f] for d,f in L1])+' '+str([texte2[d-lt1:f-lt1] for d,f in L2]) for cle in dicoPos[2]: assert cle in dicoPos[1] dic = {} ; tri = [] for cle,deb,fin in LH1: try: val,lpos = dic[cle] lpos.append((deb,fin)) dic[cle] = val+1, lpos except KeyError: dic[cle] = 1 , [(deb,fin)] for cle,(nb,lpos) in dic.iteritems(): bisect.insort_right(tri,(nb,lpos)) for nb,lpos in tri[-20:]: deb = lpos[0][0] ; fin =lpos[0][1] logging.debug('LH1: nb = '+str(nb)+' / '+texte1[deb:fin]) dic = {} ; tri = [] for cle,deb,fin in LH2: try: val,lpos = dic[cle] lpos.append((deb,fin)) dic[cle] = val+1, lpos except KeyError: dic[cle] = 1 , [(deb,fin)] for cle,(nb,lpos) in dic.iteritems(): bisect.insort_right(tri,(nb,lpos)) for nb,lpos in tri[-20:]: deb = lpos[0][0] ; fin =lpos[0][1] logging.debug('LH2: nb = '+str(nb)+' / '+texte2[deb-lt1:fin-lt1]) tri = [] for cle,liste in dicoPos[1].iteritems(): bisect.insort_right(tri,len(liste)) logging.debug('tri liste1 = '+str(tri)) logging.debug("longueur liste1 = %d, moyenne = %.2f, mediane = %d",len(tri),+(0.0+sum(tri))/len(tri),tri[len(tri)/2]) tri = [] for cle,liste in dicoPos[2].iteritems(): bisect.insort_right(tri,len(liste)) logging.debug('tri liste2 = '+str(tri)) logging.debug('longueur liste2 = %d, moyenne = %.2f, mediane = %d',len(tri),(0.0+sum(tri))/len(tri),tri[len(tri)/2]) logging.log(5,"fin calcPosCoutFixe") #psyco.bind(self.preTraitDiffSym) diffSym = self.preTraitDiffSym(LH1,LH2,dicoPos) # pré-calcul des différences symétriques #logging.debug(LH1) #logging.debug(LH2) #logging.debug(dicoPos) #diffSym = self.preTraitDiffSymVect(LH1,LH2,dicoPos) # pré-calcul des différences symétriques #trace('diffSym = self.preTraitDiffSymVect(LH1,LH2,dicoPos)',locals()) logging.log(5,"fin preTraitDiffSym") #for cle,val in diffSym.iteritems(): # assert diffSym2.has_key(cle) # assert val == diffSym2[cle],str(val)+'/'+str(cle)+'/'+str(diffSym2[cle]) #for cle,val in diffSym2.iteritems(): # assert diffSym.has_key(cle) # assert val == diffSym[cle],str(val)+'/'+str(cle)+'/'+str(diffSym[cle]) best, closedSet = self.deplacements_pond_Astar(LC1,LC2,diffSym,dicoPos,coutFixe) # A* logging.log(5,"fin deplacements_pond_Astar") dicoBest={1:{}, 2:{}} # dico des positions dans L1 et L2 du meilleur parcours while best != (-1,-1): # on remonte le meilleur parcours trouvé dicoBest[1][best[0]]=1 dicoBest[2][best[1]]=1 best=closedSet[best][1] # noeud père s1=self.__makeLRes(dicoBest[1],L1) s2=self.__makeLRes(dicoBest[2],L2) logging.debug('s1 = '+str(s1)) logging.debug('s2 = '+str(s2)) if __debug__: s11=[] s12=[] for i in xrange(len(s1)): s11.append(s1[i][0]) s12.extend(s1[i][1]) if s11[-1] is None: s11=s11[:-1] s21=[] s22=[] for i in xrange(len(s2)): s21.append(s2[i][0]) s22.extend(s2[i][1]) if s21[-1] is None: s21=s21[:-1] self.ass2__(s11,0,lt1,texte1) self.ass2__(s12,0,lt1,texte1) texte=texte1+texte2 self.ass2__(s21,lt1,len(texte1)+len(texte2),texte) self.ass2__(s22,lt1,len(texte1)+len(texte2),texte) self.ass2__(s11+s21,0,len(texte1)+len(texte2),texte) self.ass2__(s12+s22,0,len(texte1)+len(texte2),texte) del dicoPos, coutFixe, dicoBest logging.log(5,"fin _appelAstar") return s1,s2