# -*- coding: iso-8859-1 -*- # Copyright 20003 - 2008: Julien Bourdaillet (julien.bourdaillet@lip6.fr), Jean-Gabriel Ganascia (jean-gabriel.ganascia@lip6.fr) # This file is part of MEDITE. # # MEDITE is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # MEDITE is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Foobar; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import logging, os, os.path, string import numpy.oldnumeric as Numeric import numpy.numarray as numarray import numpy.numarray.ma as ma def _readFile(name): """ Lit un fichier dans le dossier courant et renvoie une chaine """ path=os.path.join(os.getcwd(),"MediteAppli","test",name) f = open(path) res = f.read() f.close() return res def test(): comite = _readFile('comite.txt') papers = _readFile('papers.txt') lcom = comite.splitlines() logging.debug('# comite='+str(len(lcom))) sepTable = string.maketrans("""\r\n\t"""," ") i = 0 for c in lcom: liste_nom = c.split() nom = liste_nom[-1] pos = papers.find(nom) if pos != -1: p = papers[pos-30:pos+70] p = p.translate(sepTable) logging.debug(c+' / '+ p +'\n') i += 1 logging.debug('# comite member with paper='+str(i)) def test2(): nb =1.0/(11) i=0 while i < 10000 and nb > 0: nb1 = 60.0*nb ar = int(nb1) nb = nb1-ar assert 0 <= ar <60 logging.debug('nb1=%f / ar=%f / nb=%f',nb1,ar,nb) i+=1 logging.debug('i='+str(i)) def test3(): import numarray a = numarray.zeros(5,numarray.Int8) for i in range(len(a)): a[i] = i b = numarray.ones(5,numarray.Int8) c = numarray.zeros(5,numarray.Int8) print a print b print a-b print b-a print numarray.minimum(a,b) taille = len(a) numarray.logical_and(b-a,c, a) print a #for pos in xrange(taille): a = numarray.array([1,2,3,4]) print numarray.sum(a) def test4(): import numarray pos = numarray.array([1,5,9,1]) x = numarray.array([0,0,0,0,0,0,0,0,0,0]) numarray.put(x, pos, [1,1,1,1]) print x def d(test,i): return where(test==i,1,0) def d2(test): d(test) #a=numarray.fromfunction(d2,(10,)) #pos2 = numarray.resize(pos,(4,10)) b = numarray.zeros(10) a = numarray.zeros((4,10)) print a for i in xrange(4-1,-1,-1): pos2 = pos[i] c = numarray.where(numarray.arange(10)==pos2,1,0) numarray.add(b,c,b) a[i] = b print a def test5(): ma = Numeric.array([[1,2,3],[4,5,6]]) for x in ma: print x def _readFile(name): """ Lit un fichier dans le dossier courant et renvoie une chaine """ path=os.path.join(os.getcwd(),"MediteAppli","test", "textes",name) f = open(path) res = f.read() f.close() return res def test6(): a1 = [-1,1,2,3,10,5] b1 = [-1,10,3,2,1,6] # ; b1.extend([-1]*2) #b1.extend([-1] * 3) a = numarray.array(a1, type = 'Float32') b = numarray.array(b1)#,6])#, type = 'Float32') #c = numarray.where(((a / b) > 0.5) or ((b / a) < 0.4),a,0) c = numarray.where((a / b > 0.5) , a, 0) c2 = numarray.where((a / b < 2) , b, 0) c3 = numarray.logical_and(c,c2) #c>0 c2 >0,c,0) print c,c2, c3 #c4 = numarray.ma.array(c,mask=c3) c4 = numarray.where(c3,a,0) c5 = numarray.where(c3,b,0) c6 = numarray.logical_and(c4,c5) print c4, c5, c6 #d = numarray.zeros(5)#, type = 'Float32') #for i in xrange(len(a)): ## print i,a[i] / b[i], b[i] / a[i] # if a[i] / b[i] > 0.5: d[i] = a[i] #print d import alignement #from test.test_data import TestDataFactory #f = TestDataFactory() #td = f.getTestData(test_data.ClaudeBernard,"2") align = alignement.AlignAstarRecur2(_readFile('CB_experience.txt')+_readFile("CB_Texte_de_1857.txt")) c7,c8 = align.extractRemplacements(a1,b1) #print numarray.nonzero(c7),numarray.nonzero(c7)[0][1] c9 = numarray.array([0,0,0]) #print numarray.nonzero(c9),numarray.nonzero(c9)[0][1] print c7,c8 print numarray.argmax(a1) print numarray.array() print len([]) def test7(): a = ['aa','bb'] b = ['aa','bb'] print a == b def test8(): a = [ 100,80,75,74,70,60,50,30,22,20,15,10,2] b = [2,5,10,12,13,19,20,30,50] import bisect print bisect.bisect_left(a,11) print bisect.bisect_right(b,2) def __splitNGrammes( texte, tailleNgram, lenT1): if True: sep=" " else: sep="." if tailleNgram == 1: return __splitNGrammes2( texte, tailleNgram, lenT1) else: r1 = __splitNGrammes2( texte, tailleNgram, lenT1) t = texte.split(sep) ; i = 0 print t while len(t[0]) == 0: t = t[1:] ; i += 1 assert len(t[0]) > 0 i += len(t[0]) ; t = t[1:] print t while len(t[0]) == 0: t = t[1:] ; i += 1 else: i += 1 print t,i r2 = __splitNGrammes2( texte[i:], tailleNgram, lenT1+i) r1.extend(r2) if tailleNgram == 2: return r1 else: assert len(t[0]) > 0 i += len(t[0]) ; t = t[1:] print t while len(t[0]) == 0: t = t[1:] ; i += 1 else: i += 1 print t,i r3 = __splitNGrammes2( texte[i:], tailleNgram, lenT1+i) r1.extend(r3) return r1 def __splitNGrammes2( texte, tailleNgram, lenT1): """Sort les blocs d'un texte en nGrammes @param texte: le texte @param tailleNgram: le nombre de mots que l'on desire dans un bloc (1=> monogramme, 2=> bigramme etc...) @param lenT1: l'offset pour la position (0 pour le texte1, len(texte1) pour le texte 2 """ if True: sep=" " else: sep="." res = [] # liste résultat i = 0 # indice courant dans le texte prev = 0 # indice de début du ngram curNgram = 0 # taille du ngram courant (en nb de gram) accu = [] # accumulateur du ngram courant while i < len(texte): car = texte[i] # caractère courant accu.append(car) if car == sep: # si séparateur, on vient d'ajouter un au ngram courant curNgram += 1 # si on a la taille de ngram souhaitée, on l'ajoute if curNgram == tailleNgram: ngram = ''.join(accu) debut = prev +lenT1 fin = debut + len(ngram) #print ngram,[debut,fin] assert ngram == texte[debut-lenT1:fin-lenT1], (ngram, texte[debut-lenT1:fin-lenT1]) res.append((ngram,[debut,fin])) accu = [] ; curNgram = 0 ; prev = i + 1 i += 1 print ''.join(accu) return res def test9(): a = "ar er rt" l = a.split(' ') print l assert len(a) == sum([len(x) for x in l])+len(l)-1 def test10(): import random random.seed(4654) random.gauss(100, 5) liste = [] for x in xrange(30): liste.append(int(random.gauss(15, 10))) print liste if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG,#INFO, format='%(asctime)s %(levelname)s %(message)s', #datefmt='%H:%M:%S', filename=os.path.join(os.getcwd(),'log.txt'), filemode='w') console = logging.StreamHandler() console.setLevel(logging.INFO) #test4() #print ord('a'),ord('A'),ord('z'),ord('Z') #print [4]*3 #test9() #r = __splitNGrammes("ab cd ef gh ij kl mn", 1, 0) #print r test10()