/* A software package to align sequences within a specified region. */ /* YAMA3 aligns sequences progressively. In each step, we do the following: Step 1: Divide the region into a few rectangle-like subregions; (To make the divide-and-conquer technique more efficiently.) Step 2: A backward pass is performed to store Score+ on the boundaries, and thus partition the problems into several independent subproblems; Step 3: A forward pass is performed to delimit the subregion and initialize the Score- for Step 4; Step 4: For each delimited subregion, compute the suboptimal DAG by applying the approach proposed in the paper "Computing all suboptimal alignments in linear space" (in a constrained fashion); Step 5: Find a path (alignment) in the DAG with the maximum pattern score. */ #include #include #include "sub.h" #include "AC.h" #define max(x,y) ((x) >= (y) ? (x) : (y)) #define min(x,y) ((x) <= (y) ? (x) : (y)) #define DEFAULT_LL 50 /* output line length */ #define CHAR_SIZE 128 /* Maximum Alphabet Size */ #define SIGMA 8 /* Alphabet Size */ #define SYMA 0 #define SYMC 1 #define SYMG 2 #define SYMT 3 #define SYMN 4 #define DASH 5 #define LSTAR 6 #define RSTAR 7 #define SUB(a,b) PD(i,j,(a),(b)) = PD(i,j,(b),(a)) = \ PD(j,i,(a),(b)) = PD(j,i,(b),(a)) #define SUBI(a,b) PD(i,j,(a),(b)) = PD(j,i,(b),(a)) #define SUBJ(a,b) PD(j,i,(a),(b)) = PD(i,j,(b),(a)) #define DAG(a) PD(i,j,(a),(a)) = PD(j,i,(a),(a)) #define DT(x) PT[i][j]x = PT[j][i]x #define DTI(x) PT[i][j]x #define DTJ(x) PT[j][i]x #define NUMBER 100 /* Maximum number of pieces */ #define ANUMBER 12 /* Maximum number of species */ #define MININT -9999999 #define MAXINT 9999999 #define IPL 25 /* Input Piece Length */ #define TRUE 1 #define FALSE 0 #define DNODE 0 /* Diagonal Node */ #define VNODE 1 /* Vertical Node */ #define HNODE 2 /* Horizontal Node */ #define XNODE 3 /* Wild Type */ #define WIDTH 10 /* Default bandwidth */ #define USAGE "Usage: yama3 [-r ALL_SIM] [-w width] [-c ] [-p ] [-t] [L=??] " /* score file format : seq1 seq2 60 2 8 -10 seq2 seq3 50 2 10 -10 default 60 2 10 -10 seq2 3 2 4 3 default 0 0 0 0 Default : to output LAT-format text -t : normal text output L : text length pattern file format : e.g. YY1 ATGG 16 TBP AATAAA 36 CBF CCAAT 25 silencer TTGGAAG 49 CDP GGTCA 25 input sequence order format: "((seq1[20,50] seq2[30,45]) seq3[20,50])" output scripts: lat header ... l script1 script2 ... first sequence l script1 script2 ... second sequence l script1 script2 ... third sequence ... positive script means # of characters negative script means # of gap symbols (dashes) */ /* char PD(NUMBER+1,NUMBER+1,SIGMA,SIGMA); */ /* symbol distance */ static char *PD_storage; /* symbol distance */ static int K; /* number of pieces */ static int AK; /* number of sequences */ /* Macros to define the scoring matrix */ #define PD_SIZE(K) (sizeof(char) * SIGMA * SIGMA * ((K)+1) * ((K)+1)) #define PD_ALLOC(K) do{PD_storage = (char*)ckalloc(PD_SIZE(K));}while(0) #define PD(a,b,c,d) PD_storage[(d) + SIGMA*((c) + SIGMA*((b) + (AK+1)*(a)))] #define PD2(a,b) (PD_storage + SIGMA*SIGMA*((b) + (AK+1)*(a))) static char PT[ANUMBER+1][ANUMBER+1][2][2][4][4]; /* Altschul gap counts */ static char *S [NUMBER+1]; /* sequences */ static int L [NUMBER+1]; /* lengths of sequences */ static int From[NUMBER+1]; /* Starting position */ static int To[NUMBER+1]; /* End position */ static char *Sname[NUMBER+1]; /* sequence piece name */ static char *ASname[ANUMBER+1]; /* sequence name */ static int StoA[NUMBER+1]; /* piece to sequence */ static char Ename[30]; /* endpoints file name */ static char Rname[30]; /* region file names */ static char tflag = 1; /* lat output format */ static char cflag = 0; /* 0 if score file not specified */ static char eflag = 0; /* 1 if endpoints file specified */ static int LL; /* line length */ char Encode[CHAR_SIZE]; /* A --> SYMA ... */ char Decode[CHAR_SIZE]; /* SYMA --> A ... */ int *Seq_Script[NUMBER+1]; /* aligned sequence script */ int Entry_No[NUMBER+1]; /* Number of script entries */ int *LB, *RB; /* Left and Right Boundaries */ static int lmost, rmost; /* Leftmost and Rightmost of the region */ static int sl; /* total length of the alignment */ static int d_no; /* number of delete operations in ALS */ static int i_no; /* number of insert operations in ALS */ static int r_no; /* number of insert operations in ALS */ static int *sapp; /* Current script append ptr */ static int last; /* Last script op appended */ /* Append "Delete k" op */ #define DEL(k) \ { sl += k; \ if (last < 0) \ last = sapp[-1] -= (k); \ else { \ last = *sapp++ = -(k); \ ++d_no; \ } \ } /* Append "Insert k" op */ #define INS(k) \ { sl += k; \ if (last > 0) \ last = sapp[-1] += (k); \ else { \ last = *sapp++ = (k); \ ++i_no; \ } \ } #define REP { ++sl; last = *sapp++ = 0; ++r_no;} /* Append "Replace" op */ /* - -> * for some cases */ #define ET(ii,jj) ((ii == DASH)? jj : ii) /* compute the value for gap table T */ /* 0 : DASH; 1 : letter; 2 : LSTAR ; 3 : RSTAR */ #define GV(ii) ((ii==DASH)? 0 : ((ii==LSTAR)? 2 : ((ii==RSTAR)? 3 : 1))) #define ETGV(kk,ii,jj) { \ kk = ET(ii,jj); \ kk = GV(kk); \ } #define FORWARD_FIRST_ROW { \ cal2 = AL2[j]; \ c = 0; \ e2 = E2[j]; \ for (J=0; J<=M2; ++J) { \ cj = ET(cal2[J],e2[J]); \ for (I=0; I<=M1; ++I) c += PD(StoA[L1+I],StoA[L2+J],he1[I],cj); \ } \ \ hg = h + c; \ dg = d + c; \ vg = v + c; \ \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ for (I=0; I<=M1; ++I) { \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][GV(he1[J])][cj]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][cal1[I]!=DASH][pj!=DASH][GV(he1[I])][cj]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][cal1[I]!=DASH][0][GV(he1[I])][cj]; \ } \ } \ \ h = max3(hg,dg,vg); \ d = v = MININT; \ \ pal2 = cal2; \ } #define FORWARD_minJ { \ tal1 = cal1; \ cal1 = pal1; \ pal1 = tal1; \ \ /* The order of the procedure calls is important. */ \ forward_script(EN,LO,SI,M1,L1); \ script2ei(EN,SI,e1,M1,L1); \ script2col(EN,SI,cal1,M1,L1); \ script2hei(EN,SI,he1,M1,L1); \ c = 0; \ ve2 = VE2[minJ]; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); \ } \ \ hg = LHH1[i-1] + c; \ dg = LDD1[i-1] + c; \ vg = LVV1[i-1] + c; \ \ pal2 = AL2[minJ]; \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pal2[J]!=DASH][ci][GV(ve2[J])]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pal2[J]!=DASH][ci][GV(ve2[J])]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][GV(ve2[J])]; \ } \ } \ \ LVV1[i] = max3(hg,dg,vg); \ LDD1[i] = MININT; \ LHH1[i] = MININT; \ } #define FORWARD_INIT(lj,rj) { \ \ tal1 = cal1; \ cal1 = pal1; \ pal1 = tal1; \ \ /* The order of the procedure calls is important. */ \ forward_script(EN,LO,SI,M1,L1); \ script2ei(EN,SI,e1,M1,L1); \ script2col(EN,SI,cal1,M1,L1); \ script2hei(EN,SI,he1,M1,L1); \ \ lb = max(LB[i],(lj)); \ rb = min(RB[i],(rj)); \ } #define FORWARD_LB(lj) { \ \ /* Initialize the first column */ \ c = 0; \ ve2 = VE2[lb]; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); \ } \ \ hg = HH[lb] + c; \ dg = DD[lb] + c; \ vg = VV[lb] + c; \ \ \ pal2 = AL2[lb]; \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pal2[J]!=DASH][ci][GV(ve2[J])]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pal2[J]!=DASH][ci][GV(ve2[J])]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][GV(ve2[J])]; \ } \ } \ \ v = max3(hg,dg,vg); \ d = MININT; \ h = MININT; \ \ if (lb > max(LB[i-1],(lj))) { /* diagonal edge should be considered. */ \ cal2 = AL2[lb]; \ pal2 = AL2[lb-1]; \ e2 = E2[lb]; \ \ /* Compute DD affected by the diagonal edge */ \ c = 0; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) { \ cj = ET(cal2[J],e2[J]); \ c += PD(StoA[L1+I],StoA[L2+J],ci,cj); \ } \ } \ \ hg = HH[lb-1] + c; \ dg = DD[lb-1] + c; \ vg = VV[lb-1] + c; \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; \ } \ } \ d = max3(hg,dg,vg); \ } \ \ } #define FORWARD_MOVE { \ \ /* Compute the remaining columns */ \ pal2 = AL2[lb]; \ for (j=lb+1; j<=rb; ++j) { \ cal2 = AL2[j]; \ e2 = E2[j]; \ ve2 = VE2[j]; \ \ /* Compute DD affected by the diagonal edge */ \ c = 0; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) { \ cj = ET(cal2[J],e2[J]); \ c += PD(StoA[L1+I],StoA[L2+J],ci,cj); \ } \ } \ \ hg = HH[j-1] + c; \ dg = DD[j-1] + c; \ vg = VV[j-1] + c; \ \ HH[j-1] = h; \ VV[j-1] = v; \ DD[j-1] = d; \ \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; \ } \ } \ \ d = max3(hg,dg,vg); \ \ /* Compute HH affected by the horizontal edge */ \ c = 0; \ for (J=0; J<=M2; ++J) { \ cj = ET(cal2[J],e2[J]); \ for (I=0; I<=M1; ++I) c += PD(StoA[L1+I],StoA[L2+J],he1[I],cj); \ } \ \ hg = HH[j-1] + c; \ dg = DD[j-1] + c; \ vg = VV[j-1] + c; \ \ for (I=0; I<=M1; ++I) { \ ci = cal1[I]; \ ei = GV(he1[I]); \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ei][cj]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][ci!=DASH][0][ei][cj]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][ci!=DASH][pj!=DASH][ei][cj]; \ } \ } \ \ h = max3(hg,dg,vg); \ \ /* Compute VV affected by the vertical edge */ \ c = 0; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); \ } \ \ hg = HH[j] + c; \ dg = DD[j] + c; \ vg = VV[j] + c; \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ ej = GV(ve2[J]); \ cj = cal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][cj!=DASH][ci][ej]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][ej]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][cj!=DASH][ci][ej]; \ } \ } \ \ v = max3(hg,dg,vg); \ \ pal2 = cal2; \ } \ \ HH[rb] = h; \ VV[rb] = v; \ DD[rb] = d; \ \ } #define BACKWARD_LAST_ROW { \ pal2 = AL2[j]; \ c = 0; \ e2 = E2[j+1]; \ for (J=0; J<=M2; ++J) { \ cj = ET(cal2[J],e2[J]); \ for (I=0; I<=M1; ++I) c += PD(StoA[L1+I],StoA[L2+J],he1[I],cj); \ } \ d = v = h += c; \ \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ for (I=0; I<=M1; ++I) \ h -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][GV(he1[J])][cj]; \ } \ \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ for (I=0; I<=M1; ++I) \ d -= PT[StoA[L1+I]][StoA[L2+J]][pal1[I]!=DASH][pj!=DASH][GV(he1[I])][cj]; \ } \ \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ for (I=0; I<=M1; ++I) \ v -= PT[StoA[L1+I]][StoA[L2+J]][pal1[I]!=DASH][0][GV(he1[I])][cj]; \ } \ cal2 = pal2; \ } #define BACKWARD_maxJ { \ \ tal1 = cal1; \ cal1 = pal1; \ pal1 = tal1; \ \ /* The order of the procedure calls is important. */ \ script2ei(EN,SI,e1,M1,L1); \ backward_script(EN,LO,SI,M1,L1); \ script2col(EN,SI,pal1,M1,L1); \ script2hei(EN,SI,he1,M1,L1); \ \ pal2 = AL2[maxJ]; \ c = 0; \ ve2 = VE2[maxJ]; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); \ } \ d = h = v = RRV1[i+1] + c; \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) \ v -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][GV(ve2[J])]; \ } \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) \ d -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pal2[J]!=DASH][ci][GV(ve2[J])]; \ } \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ for (J=0; J<=M2; ++J) \ h -= PT[StoA[L1+I]][StoA[L2+J]][0][pal2[J]!=DASH][ci][GV(ve2[J])]; \ } \ RRH1[i] = h; \ RRD1[i] = d; \ RRV1[i] = v; \ } #define BACKWARD_INIT(lj,rj) { \ \ tal1 = cal1; \ cal1 = pal1; \ pal1 = tal1; \ \ /* The order of the procedure calls is important. */ \ script2ei(EN,SI,e1,M1,L1); \ backward_script(EN,LO,SI,M1,L1); \ script2col(EN,SI,pal1,M1,L1); \ script2hei(EN,SI,he1,M1,L1); \ \ lb = max(LB[i],(lj)); \ rb = min(RB[i],(rj)); \ } #define BACKWARD_RB(rj) { \ /* Initialize the last column */ \ pal2 = AL2[rb]; \ c = 0; \ ve2 = VE2[rb]; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); \ } \ d = h = v = RV[rb] + c; \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) \ v -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][GV(ve2[J])]; \ } \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) \ d -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pal2[J]!=DASH][ci][GV(ve2[J])]; \ } \ \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ for (J=0; J<=M2; ++J) \ h -= PT[StoA[L1+I]][StoA[L2+J]][0][pal2[J]!=DASH][ci][GV(ve2[J])]; \ } \ \ if (rb < min(RB[i+1],rj)) { /* diagonal edge should be considered. */ \ cal2 = AL2[rb+1]; \ pal2 = AL2[rb]; \ e2 = E2[rb+1]; \ \ /* Compute RD, RH, RV affected by the diagonal edge */ \ c = 0; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) { \ cj = ET(cal2[J],e2[J]); \ c += PD(StoA[L1+I],StoA[L2+J],ci,cj); \ } \ } \ hg = vg = dg = RD[rb+1]; \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; \ } \ } \ \ if (c+hg > h) h = c+hg; \ if (c+vg > v) v = c+vg; \ if (c+dg > d) d = c+dg; \ } \ \ } #define BACKWARD_MOVE { \ /* Compute the remaining columns */ \ cal2 = AL2[rb]; \ for (j=rb-1; j>=lb; --j) { \ pal2 = AL2[j]; \ e2 = E2[j+1]; \ ve2 = VE2[j]; \ \ /* Compute RD, RH, RV affected by the diagonal edge */ \ c = 0; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) { \ cj = ET(cal2[J],e2[J]); \ c += PD(StoA[L1+I],StoA[L2+J],ci,cj); \ } \ } \ hg = vg = dg = RD[j+1]; \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; \ } \ } \ \ RH[j+1] = h; \ RV[j+1] = v; \ RD[j+1] = d; \ h = c+hg; \ v = c+vg; \ d = c+dg; \ \ /* Compute RD, RH, RV affected by the horizontal edge */ \ c = 0; \ for (I=0; I<=M2; ++I) { \ cj = ET(cal2[I],e2[I]); \ for (J=0; J<=M1; ++J) c += PD(StoA[L1+J],StoA[L2+I],he1[J],cj); \ } \ hg = vg = dg = RH[j+1]; \ for (I=0; I<=M1; ++I) { \ pi = pal1[I]; \ ei = GV(he1[I]); \ for (J=0; J<=M2; ++J) { \ ETGV(cj,cal2[J],e2[J]) \ pj = pal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ei][cj]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ei][cj]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ei][cj]; \ } \ } \ \ if (c+hg > h) h = c+hg; \ if (c+vg > v) v = c+vg; \ if (c+dg > d) d = c+dg; \ \ /* Compute RD, RH, RV affected by the vertical edge */ \ c = 0; \ for (I=0; I<=M1; ++I) { \ ci = ET(cal1[I],e1[I]); \ for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); \ } \ hg = dg = vg = RV[j]; \ for (I=0; I<=M1; ++I) { \ ETGV(ci,cal1[I],e1[I]) \ pi = pal1[I]; \ for (J=0; J<=M2; ++J) { \ ej = GV(ve2[J]); \ pj = pal2[J]; \ hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][ej]; \ vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][ej]; \ dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][ej]; \ } \ } \ \ if (c+hg > h) h = c+hg; \ if (c+vg > v) v = c+vg; \ if (c+dg > d) d = c+dg; \ \ cal2 = pal2; \ } \ RH[lb] = h; \ RV[lb] = v; \ RD[lb] = d; \ } #define UPDATE_INDEX(score,i,j) { \ if ((score) >= cut_off) { \ if ((i) < minI) minI = (i); \ if ((i) > maxI) maxI = (i); \ if ((j) < minJ) minJ = (j); \ if ((j) > maxJ) maxJ = (j); \ } \ } static int *C1, *C2; /* Precomputed Column scores for two alignments */ static char **E2; /* End gaps indicator */ static char **VE2; /* End gaps indicator */ /* aligning tree */ typedef struct tnode *Tptr; typedef struct tnode { Tptr left; /* left link */ Tptr right; /* right link */ int L; /* leftmost sequence */ int R; /* rightmost sequence */ char **AL; /* return alignment */ int N; /* alignment length */ } Tnode; char order[NUMBER*IPL]; /* alignment order */ void main(), data(), StoAL(), MALStoMAL(), MDisplay(), LDisplay(), SDisplay(), order_print(), print_score(); void print_F(), C_Construct(), Row_link(), FtoDAG(), DAG_traverse(); void DAG_pat_match(), sort_list(), back_chain(); void output_path(int, int, char, int, int, char); char Consensus(); Tptr build_tree(); int traverse_tree(); void Read_regions(), LR(); int suboptimal(int,int,int,int,int *,int *,int *,int *,int *,int *, int *,int *,int *,int *,int *,int *,char,char); void main(int argc, char *argv[]) { FILE *stream, *ep, *fp, *lav_fp, *ckopen(); int e, m, v; char cname[30], bname[30], pname[30], *sname, fname[30], *get_seqname(); char **AL; int *ALS; char *ckalloc(); int i,j; int c; Tptr tp; int bd,ip, width, from, to; /* gflag = 0; */ strcpy(Rname,"ALL_SIM"); /* default regions */ strcpy(pname,"Pat"); /* default pattern file */ width = WIDTH; /* default width */ LL = DEFAULT_LL; bd = 0; ip = 2; if (argc <= 1) fatal(USAGE); for ( ++argv; --argc; argv++ ) if ( (*argv)[0] == '-' ) { /* optional parameters */ /* if ( (*argv)[1] == 'g' ) { if ( (*argv)[2] == '2' ) gflag = 2; else gflag = 1; } else if ( (*argv)[1] == 't' ) tflag = 0; */ if ( (*argv)[1] == 't' ) tflag = 0; /* text output */ else if ( (*argv)[1] == 'w' ) { /* width is provided */ ++argv; --argc; sscanf(*argv,"%d",&width); } else if ( (*argv)[1] == 'd' ) { /* block distance is provided */ ++argv; --argc; sscanf(*argv,"%d",&bd); ++argv; --argc; sscanf(*argv,"%d",&ip); } else if ( (*argv)[1] == 'c' ) { /* score file is provided */ ++argv; --argc; sscanf(*argv,"%s",cname); score_name(cname); cflag = 1; } else if ( (*argv)[1] == 'r' ) { /* ALL_SIM file is provided */ ++argv; --argc; sscanf(*argv,"%s",Rname); } else if ( (*argv)[1] == 'p' ) { /* The pattern file is provided */ ++argv; --argc; sscanf(*argv,"%s",pname); } else if ( (*argv)[1] == 'e' ) { /* end points and block file are provided */ ++argv; --argc; sscanf(*argv,"%s",Ename); eflag = 1; } else fatal(USAGE); } else if ((*argv)[0] == 'L' && (*argv)[1] == '=') LL = atoi(*argv + 2); else { strcpy(order,*argv); tp = build_tree(order); } /* for (i=1; i<=K; ++i) printf("Seq %d: Length: %d %s\n",i,L[i],S[i]+1); */ /* if (!eflag) fatal("block file is not provided!"); ep = ckopen(Ename,"r"); Read_blocks(ep,bd,ip,Sname,K); fclose(ep); */ /* Read_regions(Rname,width,Sname,K,From,To); */ /* get all alignments */ get_all(Rname,width,Sname,From,To); /* get sequence names, July 13, 1993 */ fp = ckopen(Rname,"r"); AK = 0; while (fscanf(fp,"%s",&fname) != EOF) { lav_fp = ckopen(fname,"r"); lav_read_init(lav_fp); for (j=0; j<2; ++j) { sname = get_seqname(&from, &to, 0); i = 0; while (i=AK) { ASname[i] = sname; ++AK; } } fclose(lav_fp); } fclose(fp); /* Compute StoA */ for (i=1; i<=K; ++i) { j = 0; while (j=AK) fatal("Alignment files are not enough."); } PD_ALLOC(AK); /* allocate space for cost matrices */ if (tflag) { printf("#:lat\n\n"); printf("d {\n \"YAMA3 output with parameters:\n"); } /* print out the aligning order */ order_print(order); /* if (cflag) printf(" score file: %s\n",cname); else printf(" default score file used\n"); */ data(); /* get input scoring scheme */ script_init(); /* initialize the scripts */ Construct(pname); c = traverse_tree(tp); /* traverse the aligning tree */ /* get the align_score */ /* Notice that alignment score returned by traverse_tree may not be the same as align_score. This is beacause some quasi-gaps and end-gaps are not penalized in the alignment score. */ c = align_score(K-1,tp->N); if (tflag) { /* Lat output */ printf(" \"\n"); printf("}\n"); SDisplay(c); /* or lav output LDisplay(tp->AL,K-1,tp->N,c); */ } else { /* texture output */ printf("\n alignment score = %d\n\n",c); script2AL(tp->L, tp->R-tp->L, &tp->AL, tp->N); MDisplay(tp->AL,K-1,tp->N); free(tp->AL[0]); free(tp->AL); } exit(0); } /* order_print - print out the sequence order */ void order_print(char *order) { char *op, buf[80], flag; int LL = 60, IL = 54, i; int line_no=0; flag = TRUE; while (flag) { while (*order == ' ') ++order; i = 0; op = order; if (++line_no > 1) LL = IL; while ((*op++ != '\0') && (i++ < LL)); op--; if (*op == '\0') flag = FALSE; if (i > 0) { while (*op != ']' && *op !=')' && *op != '\0' && i-- > 0) op--; if (i <= 0) { flag = FALSE; if (line_no == 1) printf(" order=%s\n",order); else printf(" %s\n",order); printf("Last line is too long!\n"); } else { strncpy(buf,order,i); buf[i] = '\0'; if (line_no == 1) printf(" order=%s\n",buf); else printf(" %s\n",buf); } } order = ++op; } } /* build_tree - return the root of the aligning tree */ Tptr build_tree(char *order) { Tptr Stack[NUMBER], p; int i,j,k; int sp; char temp[20]; char *ckalloc(); char *tname[NUMBER+1]; /* sequence name */ int tfrom[NUMBER], tto[NUMBER], tK; char *get_seqname(); FILE *fp, *ckopen(); int strsame(); char fname[40]; if (eflag) { /* read end points */ fp = ckopen(Ename,"r"); lav_read_init(fp); tK = 0; while ((tname[tK] = get_seqname(&tfrom[tK], &tto[tK],0))) if (++tK > NUMBER) fatal("The maximum number of sequences is %d",NUMBER); fclose(fp); } sp = 0; K = 0; i = 0; while (order[i] != '\0') { switch (order[i]) { case ')': if (sp < 2) fatal("wrong input format!"); p = (Tptr)ckalloc(sizeof(Tnode)); p->right = Stack[--sp]; p->left = Stack[--sp]; p->L = (p->left)->L; p->R = (p->right)->R; Stack[sp++] = p; case '(': case ']': case ' ': case '\t': ++i; break; default: if (++K > NUMBER) fatal("The maximum number of sequences is %d",NUMBER); j = strcspn(order+i,"( )\0\t"); strncpy(fname,order+i,j); /* printf("fname\n",K,fname[K]); */ i += j; L[K] = get_seq(fname,&S[K],&Sname[K],&From[K],&To[K]); S[K]--; /* printf("K=%d, From=%d, To=%d\n",K,From[K],To[K]); fflush(stdout); */ p = (Tptr)ckalloc(sizeof(Tnode)); p->left = p->right = NULL; p->L = p->R = K; Stack[sp++] = p; break; } } if (sp != 1) fatal("wrong format!"); return Stack[0]; } /* traverse_tree - traverse the aligning tree */ int traverse_tree(Tptr tp) { int *ALS; int SL, i, l, r, n, tb; int D_NO, I_NO, R_NO; int c, bc; int c1, c2; Tptr p,q; char *ckalloc(); p = tp->left; q = tp->right; if (p->L == p->R) { /* one to many */ /* p->AL = (char **) ckalloc((L[p->L]+1)*sizeof(char *)); p->AL[0] = (char *) ckalloc(L[p->L]+1); for (i=1; i<=L[p->L]; ++i) p->AL[i] = (p->AL[i-1])+1; StoAL(S[p->L],L[p->L],p->AL); */ p->N = L[p->L]; c1 = 0; } else c1 = traverse_tree(p); if (q->L == q->R) { /* many to one */ /* q->AL = (char **) ckalloc((L[q->L]+1)*sizeof(char *)); q->AL[0] = (char *) ckalloc(L[q->L]+1); for (i=1; i<=L[q->L]; ++i) q->AL[i] = (q->AL[i-1])+1; StoAL(S[q->L],L[q->L],q->AL); */ q->N = L[q->L]; c2 = 0; } else c2 = traverse_tree(q); /* printf("(%d %d)",tp->L,tp->R); */ ALS = (int *)ckalloc((p->N + q->N +1)*sizeof(int)); LB = (int *) ckalloc((p->N + 1) * sizeof(int)); RB = (int *) ckalloc((p->N + 1) * sizeof(int)); /* for (i=0; i<=p->N; ++i) { LB[i] = ((i*8 <= q->N)? i*8 : q->N); RB[i] = ((LB[i]+100 <= q->N)? LB[i]+100 : q->N); printf("i=%d, %d %d\n",i,LB[i],RB[i]); } RB[p->N] = q->N; */ /* printf("aligning %d %d\n",p->L,q->L); fflush(stdout); */ /* get region boundary lines */ /* lmost and rmost denote the leftmost and rightmost positions of the first alignment that should be aligned. */ LR(p->R - p->L, p->N, p->L, q->R - q->L, q->N, q->L, LB, RB, &lmost, &rmost); /* printf("LR done\n"); printf("lmost=%d, romst=%d, N=%d\n",lmost,rmost,p->N); printf("llb=%d, lrb=%d, rlb=%d, rrb=%d\n",LB[lmost],RB[lmost],LB[rmost],RB[rmost]); for (i=0; i<=p->N; ++i) printf("i = %d, %d %d\n",i,LB[i],RB[i]); fflush(stdout); */ bc = YAMA3(p->L, p->R - p->L, p->N, q->L, q->R - q->L, q->N, ALS, &SL, &D_NO, &I_NO, &R_NO); /* c = YAMA2(p->L, p->R - p->L, p->N, q->L, q->R - q->L, q->N, ALS, &SL, &D_NO, &I_NO, &R_NO); if (c != bc) { printf("p->L=%d, q->L=%d\n",p->L,q->L); printf("c=%d, bc=%d\n",c,bc); } fprintf(stderr,"c=%d,bc=%d\n",c,bc); */ /* printf("yama2 done\n"); printf("SL= %d\n",SL); printf("optimal score= %d\n",c); fflush(stdout); */ ALS2script(ALS, D_NO, I_NO, R_NO, p->L, p->R, q->L, q->R); n = tp->R - tp->L + 1; tp->N = SL; /* tp->AL = (char **)ckalloc((SL+1)*sizeof(char *)); tp->AL[0] = (char *)ckalloc(n*(SL+1)); for (i=1; i<=SL; ++i) tp->AL[i] = tp->AL[i-1] + n; MALStoMAL(p->AL,p->R - p->L, p->N, q->AL, q->R - q->L, q->N, ALS, SL, tp->AL); script2AL(tp->L, tp->R-tp->L, tp->AL); free(p->AL[0]); free(p->AL); free(q->AL[0]); free(q->AL); */ free(ALS); free(LB); free(RB); return bc+c1+c2; } /* script_init - initialize sequence scripts */ int script_init() { int i; char *ckalloc(); for (i=1; i<=K; ++i) { Seq_Script[i] = (int *) ckalloc(sizeof(int)); Seq_Script[i][0] = L[i]; Entry_No[i] = 1; } } /* script2AL - sequence scripts to alignment array */ int script2AL(int l, int m, char ***al, int n) { int i, ci, si, j, k, i1; int c, *sp; char *sq; char **AL, *ckalloc(); /* fprintf(stderr, "Script2AL: l=%d, m=%d\n",l,m); */ /* allocate space for alignment array */ AL = *al = (char **)ckalloc((n+1)*sizeof(char *)); AL[0] = (char *)ckalloc((m+1)*(n+1)); for (i=1; i<=n; ++i) AL[i] = AL[i-1] + (m+1); /* translate sequence scripts to an alignment array */ for (j=0; j<=m; ++j) { AL[0][j] = DASH; ci = 1; si = 1; sp = Seq_Script[l+j]; sq = S[l+j]; for (k=0; k 0) { /* non-gap */ for (i=ci, i1=si; i 0) { /* insertion */ /* printf("op=%d, val=%d\n",op,val); fflush(stdout); */ if (i == OP_NO - 1) { /* last entry */ if (tp[tpi-1] > 0 ) tp[tpi++] = -op; else tp[tpi-1] = tp[tpi-1]-op; } else { if (sp[spi] > 0) { if (val == sp[spi]) { if (spi == 0) tp[tpi++] = -op; else tp[tpi-1] = tp[tpi-1] - op; } else { /* split */ tp[tpi++] = sp[spi] - val; tp[tpi++] = -op; sp[spi] = val; } } else { sp[spi] = sp[spi] - op; } } } else { /* deletion */ /* printf("op=%d, val=%d\n",op,val); fflush(stdout); */ t = -op; while (t > 0) { /* printf("t=%d, val=%d\n",t,val); fflush(stdout); */ if (t >= val) { t = t - val; tp[tpi++] = sp[spi++]; if (spi < en ) val = abs(sp[spi]); } else { val = val - t; t = 0; } } } } /* for (t=0; t 0 ) tp[tpi++] = op; else tp[tpi-1] = tp[tpi-1]+op; } else { if (sp[spi] > 0) { if (val == sp[spi]) { if (spi == 0) tp[tpi++] = op; else tp[tpi-1] = tp[tpi-1] + op; } else { /* split */ tp[tpi++] = sp[spi] - val; tp[tpi++] = op; sp[spi] = val; } } else { sp[spi] = sp[spi] + op; } } } else { /* insertion */ /* printf("op=%d, val=%d\n",op,val); fflush(stdout); */ t = op; while (t > 0) { if (t >= val) { t = t - val; tp[tpi++] = sp[spi++]; if (spi < en ) val = abs(sp[spi]); } else { val = val - t; t = 0; } } } } /* for (t=0; t 0) { col[i] = Encode[S[i1][SI[i]]]; } else { col[i] = DASH; } } } /* script2ei - Script to an end gap indicator */ int script2ei(int *EN, int *SI, char *ei, int num, int l) { int i, i1; for (i=0; i<=num; ++i) { i1 = l+i; if (SI[i] == 0) ei[i] = LSTAR; /* Column 0 */ else if (Seq_Script[i1][EN[i]] > 0) { ei[i] = DASH; } else { if (SI[i] == 1) ei[i] = LSTAR; else if (SI[i] > L[i1]) ei[i] = RSTAR; else ei[i] = DASH; } } } /* script2hei - Script to an end gap indicator (for horizontal move) */ int script2hei(int *EN, int *SI, char *hei, int num, int l) { int i, i1; for (i=0; i<=num; ++i) { i1 = l+i; if (SI[i] == 0) hei[i] = LSTAR; else if (Seq_Script[i1][EN[i]] > 0) { if (SI[i] == L[i1]) hei[i] = RSTAR; else hei[i] = DASH; } else { if (SI[i] == 1) hei[i] = LSTAR; else if (SI[i] > L[i1]) hei[i] = RSTAR; else hei[i] = DASH; } } } /* forward_script - move script one more column */ int forward_script(int *EN, int *LO, int *SI, int num, int l) { int i, i1; for (i=0; i<=num; ++i) { i1 = l+i; if ((Seq_Script[i1][EN[i]] > 0) || (SI[i] == 0)) { ++SI[i]; } if (--LO[i] <= 0) { /* no leftover */ ++EN[i]; LO[i] = abs(Seq_Script[i1][EN[i]]); } } } /* flocate_entry - given a column index, locate the forward script entry */ int flocate_entry(int *EN, int *LO, int *SI, int num, int l, int col_index, int n) { int i, i1; int en, value, lo, si, ci, t; for (i=0; i<=num; ++i) { i1 = l+i; if (col_index == 0) { /* column 0 */ SI[i] = 0; EN[i] = 0; LO[i] = abs(Seq_Script[i1][0])+1; } else { ci = 0; si = 0; en = 0; value = Seq_Script[i1][en]; lo = abs(value); while (ci+lo < col_index) { if (value > 0) si = si+value; ci = ci+lo; value = Seq_Script[i1][++en]; lo = abs(value); } t = col_index-ci; lo = lo-t+1; if (value > 0) si = si+t; else si = si+1; SI[i] = si; EN[i] = en; LO[i] = lo; } /* printf("i=%d, SI[i]=%d, EN[i]=%d, LO[i]=%d\n",i,SI[i],EN[i],LO[i]); */ } } /* backward_script - move script one more column backward */ int backward_script(int *EN, int *LO, int *SI, int num, int l) { int i, i1; for (i=0; i<=num; ++i) { i1 = l+i; if (--LO[i] <= 0) { /* no leftover */ /* printf("en=%d\n",EN[i]); fflush(stdout); */ if (EN[i]>0) { --EN[i]; if (Seq_Script[i1][EN[i]] > 0) { --SI[i]; } } else { SI[i] = 0; /* printf("EN=0, i=%d, num=%d\n",i,num); */ } LO[i] = abs(Seq_Script[i1][EN[i]]); } else { if (Seq_Script[i1][EN[i]] > 0) { --SI[i]; } } } } /* locate_entry - given a column index, locate the script entry */ int locate_entry(int *EN, int *LO, int *SI, int num, int l, int col_index, int n) { int i, i1; int en, value, lo, si, ci, t; for (i=0; i<=num; ++i) { i1 = l+i; if (col_index == 0) { /* column 0 */ SI[i] = 0; EN[i] = 0; LO[i] = 0; } else { ci = n; si = L[i1]+1; en = Entry_No[i1]-1; value = Seq_Script[i1][en]; lo = abs(value); while (ci-lo >= col_index) { if (value > 0) si = si-value; ci = ci-lo; value = Seq_Script[i1][--en]; lo = abs(value); } t = ci - col_index; lo = lo - t; if (value > 0) si = si-t-1; SI[i] = si; EN[i] = en; LO[i] = lo; } /* printf("i=%d, SI[i]=%d\n",i,SI[i]); printf("si=%d, en=%d, lo=%d\n",si,en,lo); fflush(stdout); */ } } #define COST \ { DAG(DASH) = 0; \ DAG(SYMA) = m; \ DAG(SYMC) = m; \ DAG(SYMG) = m; \ DAG(SYMT) = m; \ SUB(DASH,SYMA) = -GE; \ SUB(DASH,SYMC) = -GE; \ SUB(DASH,SYMG) = -GE; \ SUB(DASH,SYMT) = -GE; \ SUB(SYMA,SYMC) = v; \ SUB(SYMA,SYMG) = v; \ SUB(SYMA,SYMT) = v; \ SUB(SYMC,SYMG) = v; \ SUB(SYMC,SYMT) = v; \ SUB(SYMG,SYMT) = v; \ \ DT([0][0][0][0]) = 0; /* -- : -- */ \ DT([0][0][0][1]) = G; /* -- : -x */ \ DT([0][0][1][0]) = G; /* -x : -- */ \ DT([0][0][1][1]) = 0; /* -x : -x */ \ DT([0][1][0][0]) = 0; /* -- : x- */ \ DT([0][1][0][1]) = 0; /* -- : xx */ \ DT([0][1][1][0]) = G; /* -x : x- */ \ DT([0][1][1][1]) = 0; /* -x : xx */ \ DT([1][0][0][0]) = 0; /* x- : -- */ \ DT([1][0][0][1]) = G; /* x- : -x */ \ DT([1][0][1][0]) = 0; /* xx : -- */ \ DT([1][0][1][1]) = 0; /* xx : -x */ \ DT([1][1][0][0]) = 0; /* x- : x- */ \ DT([1][1][0][1]) = G; /* x- : xx */ \ DT([1][1][1][0]) = G; /* xx : x- */ \ DT([1][1][1][1]) = 0; /* xx : xx */ \ \ \ /* Left end gaps */ \ DAG(LSTAR) = 0; \ SUBI(LSTAR,SYMA) = -LGGEI; \ SUBI(LSTAR,SYMC) = -LGGEI; \ SUBI(LSTAR,SYMG) = -LGGEI; \ SUBI(LSTAR,SYMT) = -LGGEI; \ SUBJ(LSTAR,SYMA) = -LGGEJ; \ SUBJ(LSTAR,SYMC) = -LGGEJ; \ SUBJ(LSTAR,SYMG) = -LGGEJ; \ SUBJ(LSTAR,SYMT) = -LGGEJ; \ SUB(LSTAR,DASH) = 0; \ \ DT([0][0][0][2]) = 0; /* -- : -* */ \ DTI([0][0][1][2]) = LGGJ; /* -x : -* */ \ DTJ([0][0][1][2]) = LGGI; /* -x : -* */ \ DT([0][0][2][0]) = 0; /* -* : -- */ \ DTI([0][0][2][1]) = LGGI; /* -* : -x */ \ DTJ([0][0][2][1]) = LGGJ; /* -* : -x */ \ DT([0][0][2][2]) = 0; /* -* : -* */ \ DT([0][1][0][2]) = 0; /* -- : x* */ \ DTI([0][1][1][2]) = LGGJ; /* -x : x* */ \ DTJ([0][1][1][2]) = LGGI; /* -x : x* */ \ DT([0][1][2][0]) = 0; /* -* : x- */ \ DT([0][1][2][1]) = 0; /* -* : xx */ \ DT([0][1][2][2]) = 0; /* -* : x* */ \ DT([1][0][0][2]) = 0; /* x- : -* */ \ DT([1][0][1][2]) = 0; /* xx : -* */ \ DT([1][0][2][0]) = 0; /* x* : -- */ \ DTI([1][0][2][1]) = LGGI; /* x* : -x */ \ DTJ([1][0][2][1]) = LGGJ; /* x* : -x */ \ DT([1][0][2][2]) = 0; /* x* : -* */ \ DT([1][1][0][2]) = 0; /* x- : x* */ \ DTI([1][1][1][2]) = LGGJ; /* xx : x* */ \ DTJ([1][1][1][2]) = LGGI; /* xx : x* */ \ DT([1][1][2][0]) = 0; /* x* : x- */ \ DTI([1][1][2][1]) = LGGI; /* x* : xx */ \ DTJ([1][1][2][1]) = LGGJ; /* x* : xx */ \ DT([1][1][2][2]) = 0; /* x* : x* */ \ \ /* Right end gaps */ \ DAG(RSTAR) = 0; \ SUBI(RSTAR,SYMA) = -RGGEI; \ SUBI(RSTAR,SYMC) = -RGGEI; \ SUBI(RSTAR,SYMG) = -RGGEI; \ SUBI(RSTAR,SYMT) = -RGGEI; \ SUBJ(RSTAR,SYMA) = -RGGEJ; \ SUBJ(RSTAR,SYMC) = -RGGEJ; \ SUBJ(RSTAR,SYMG) = -RGGEJ; \ SUBJ(RSTAR,SYMT) = -RGGEJ; \ SUB(RSTAR,DASH) = 0; \ \ DT([0][0][0][3]) = 0; /* -- : -* */ \ DTI([0][0][1][3]) = RGGJ; /* -x : -* */ \ DTJ([0][0][1][3]) = RGGI; /* -x : -* */ \ DT([0][0][3][0]) = 0; /* -* : -- */ \ DTI([0][0][3][1]) = RGGI; /* -* : -x */ \ DTJ([0][0][3][1]) = RGGJ; /* -* : -x */ \ DT([0][0][3][3]) = 0; /* -* : -* */ \ DT([0][1][0][3]) = 0; /* -- : x* */ \ DTI([0][1][1][3]) = RGGJ; /* -x : x* */ \ DTJ([0][1][1][3]) = RGGI; /* -x : x* */ \ DT([0][1][3][0]) = 0; /* -* : x- */ \ DT([0][1][3][1]) = 0; /* -* : xx */ \ DT([0][1][3][3]) = 0; /* -* : x* */ \ DT([1][0][0][3]) = 0; /* x- : -* */ \ DT([1][0][1][3]) = 0; /* xx : -* */ \ DT([1][0][3][0]) = 0; /* x* : -- */ \ DTI([1][0][3][1]) = RGGI; /* x* : -x */ \ DTJ([1][0][3][1]) = RGGJ; /* x* : -x */ \ DT([1][0][3][3]) = 0; /* x* : -* */ \ DT([1][1][0][3]) = 0; /* x- : x* */ \ DTI([1][1][1][3]) = RGGJ; /* xx : x* */ \ DTJ([1][1][1][3]) = RGGI; /* xx : x* */ \ DT([1][1][3][0]) = 0; /* x* : x- */ \ DTI([1][1][3][1]) = RGGI; /* x* : xx */ \ DTJ([1][1][3][1]) = RGGJ; /* x* : xx */ \ DT([1][1][3][3]) = 0; /* x* : x* */ \ \ SUB(LSTAR,RSTAR) = 0; \ DT([0][0][2][3]) = 0; /* -* : -@ */ \ DT([0][0][3][2]) = 0; \ DT([0][1][2][3]) = 0; \ DT([0][1][3][2]) = 0; \ DT([1][0][2][3]) = 0; \ DT([1][0][3][2]) = 0; \ DT([1][1][2][3]) = 0; \ DT([1][1][3][2]) = 0; \ } /* data : read input scoring scheme */ void data () { auto int i, j; auto int m, v; auto int G, GE; /* gap cost */ auto int LGGI, LGGJ, LGGEI, LGGEJ; /* end gap cost */ auto int RGGI, RGGJ, RGGEI, RGGEJ; /* end gap cost */ auto char first_time; for (i=0; i 0) { ++j; ++k; alk = AL[k]; for (I=0; I<=M1; ++I) alk[I]=DASH; alt = AL2[j]; for (I=0; I<=M2; ++I) alk[M1+1+I]=alt[I]; op--; } else { ++i; ++k; alk = AL[k]; alt = AL1[i]; for (I=0; I<=M1; ++I) alk[I]=alt[I]; for (I=0; I<=M2; ++I) alk[M1+1+I]=DASH; op++; } } } } /* MDisplay - display a multiple alignment */ void MDisplay(char **AL, int M, int N) { int i,j,k; int POS[NUMBER]; for (k=0; k<=M; ++k) POS[k] = From[k+1]; for (i=1; i<=N; i=i+LL) { printf(" "); for (j=i+1; j<=N+1 && j=1 && AL2[j][i]==DASH) { VE2[j][i] = E2[j][i] = RSTAR; --j; } VE2[j][i] = RSTAR; } /* Compute projection cost within AL1 */ /* Compute C1[lmost+1] ... C1[rmost] */ /* c1 = 0; flocate_entry(EN,LO,SI,M1,L1,lmost,N1); script2col(EN,SI,pal1,M1,L1); forward_script(EN,LO,SI,M1,L1); for (i=lmost+1; i<=rmost; ++i) { t = 0; script2col(EN,SI,cal1,M1,L1); script2ei(EN,SI,e1, M1,L1); for (I=0; I 0) { /* starting with some deletions */ DEL(lmost); init_node = VNODE; } else { init_node = XNODE; } end_node = XNODE; LB[lmost] = 0; /* Make sure that it's normalized. */ RB[rmost] = N2; /* Compute an optimal path from init_node at (lmost,0) to end_node at (rmost,N2) */ c = yama2(lmost,0,init_node,rmost,N2,end_node); if (rmost < N1) DEL(N1-rmost); /* finished with some deletions */ *SL = sl; *D_NO = d_no; *I_NO = i_no; *R_NO = r_no; /* free(C1+lmost); free(C2); */ free(RH); free(RV); free(RD); free(HH); free(VV); free(DD); free(E2[0]); free(E2); free(VE2[0]); free(VE2); free(HP); free(VP); free(DP); free(HN); free(VN); free(DN); free(PP[0]+lmost); free(PP[1]+lmost); free(PP[2]+lmost); free(PN[0]+lmost); free(PN[1]+lmost); free(PN[2]+lmost); free(RI+lmost); if (AL1) { free(AL1[0]); free(AL1); } if (AL2) { free(AL2[0]); free(AL2); } free(EN); free(SI); free(LO); free(pal1); free(cal1); free(e1); free(he1); return c; } /* yama2 - return an optimal alignment of two alignments (within a region) */ int yama2(int I1, int J1, char Node1, int I2, int J2, char Node2) { int i, j, I, J; int h, v, d, t, c; int hg, vg, dg; char *cal2, *pal2; char ci, cj, pi, pj, ei, ej; char *e2, *ve2; int lb, rb; int i1, j1, i2, j2, b, b1, b2; int cmid, nmid, pmid; char flag, hflag, vflag, dflag; int hp, vp, dp, tp; char hn, vn, dn, tn, n1, n2; int l_save, r_save; /* printf("I1=%d, J1=%d, Node1=%d, I2=%d, J2=%d, Node2=%d\n",I1,J1,Node1,I2,J2,Node2); printf("i=%d, %d %d\n",i,LB[i],RB[i]); for (i=I1; i<=I2; ++i) if (LB[i]>RB[i]) { printf("Boundary error.\n"); printf("i=%d, %d %d\n",i,LB[i],RB[i]); fflush(stdout); exit(0); } fflush(stdout); */ /* Backward computation */ /* Initialize the last row */ if (Node2 == XNODE) h = v = d = RH[J2] = RV[J2] = RD[J2] = 0; else { h = v = d = RH[J2] = RV[J2] = RD[J2] = MININT; if (Node2 == HNODE) h = RH[J2] = 0; else if (Node2 == VNODE) RV[J2] = 0; else RD[J2] = 0; } RI[I2+J2] = I2; cmid = J2; if (I1 < I2) nmid = (LB[I2-1] + RB[I2-1] + 1)/2; else nmid = J1; /* initialize script pointers */ locate_entry(EN,LO,SI,M1,L1,I2,N1); script2col(EN,SI,cal1,M1,L1); script2hei(EN,SI,he1,M1,L1); /* he1 = HE1[I2]; */ cal2 = AL2[J2]; for (j=J2-1; j>=LB[I2]; --j) { pal2 = AL2[j]; /* c = C2[j+1]; */ c = 0; e2 = E2[j+1]; for (I=0; I<=M2; ++I) { cj = ET(cal2[I],e2[I]); for (J=0; J<=M1; ++J) c += PD(StoA[L1+J],StoA[L2+I],he1[J],cj); } d = v = h += c; /* for (I=0; I= nmid) { /* (I2,j+1) is a partition point */ HP[j] = VP[j] = DP[j] = I2+j+1; HN[j] = VN[j] = DN[j] = HNODE; } else { HP[j] = VP[j] = DP[j] = HP[j+1]; HN[j] = VN[j] = DN[j] = HN[j+1]; } if (j >= nmid) { /* (I2,j) is a partition point */ b = I2+j; RI[b] = I2; PP[DNODE][b] = PP[VNODE][b] = PP[HNODE][b] = b+1; PN[DNODE][b] = PN[VNODE][b] = PN[HNODE][b] = HNODE; } /* printf("i=%d, j=%d, d= %d, v=%d, h=%d\n",I2,j,d,v,h); */ } for (j=LB[I2]-1; j>=J1; --j) { RH[j] = h+MININT; RD[j] = d+MININT; RV[j] = v+MININT; } /* Compute dynamic programming matrix */ for (i=I2-1; i>=I1; --i) { /* The order of the procedure calls is important. */ script2ei(EN,SI,e1,M1,L1); backward_script(EN,LO,SI,M1,L1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); /* e1 = E1[i+1]; pal1 = AL1[i]; for (I=0; I<=M1; ++I) if (pal1[I] != tt[I]) { printf("L1=%d, M1=%d, i=%d, I=%d\n",L1,M1,i,I); printf("SI=%d, LO=%d, EN=%d, Seq_Script=%d %d\n",SI[I], LO[I], EN[I], Seq_Script[L1+I][EN[I]+1], Seq_Script[L1+I][EN[I]]); printf("pal1=%c, tt=%c\n",pal1[I], tt[I]); printf("AL1[i][0]=%c, AL1[i+1][0]=%c\n",AL1[i][0],AL1[i+1][0]); } he1 = HE1[i]; */ pal2 = AL2[RB[i]]; lb = LB[i]; rb = RB[i]; pmid = cmid; cmid = nmid; if (i > I1) nmid = (LB[i-1]+RB[i-1] + 1)/2; else nmid = J1; /* Initialize the last column */ /* c = C1[i+1]; */ c = 0; ve2 = VE2[rb]; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); } d = h = v = RV[rb] + c; /* for (I=0; I=nmid && rb<=cmid) || (rb>cmid && rb<=min(LB[i+1], RB[i]))) { flag = TRUE; b = i+rb; RI[b] = i; } else flag = FALSE; if ((rb>=cmid && rb<=pmid) || (i+1pmid && rb<=min(LB[i+2], RB[i+1]))) { vflag = TRUE; } else vflag = FALSE; if (vflag) { /* (i+1, rb) is a partition point */ hp = vp = dp = i+rb+1; hn = vn = dn = VNODE; } else { hp = vp = dp = VP[rb]; hn = vn = dn = VN[rb]; } if (rb < RB[i+1]) { /* diagonal edge should be considered. */ cal2 = AL2[rb+1]; pal2 = AL2[rb]; e2 = E2[rb+1]; /* Compute RD, RH, RV affected by the diagonal edge */ /* c = C1[i+1] + C2[rb+1]; */ c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) { cj = ET(cal2[J],e2[J]); c += PD(StoA[L1+I],StoA[L2+J],ci,cj); } } hg = vg = dg = RD[rb+1]; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; } } /* for (I=0; I=cmid && rb+1<=pmid) || (i+1pmid && rb+1<=min(LB[i+2], RB[i+1]))) { dflag = TRUE; } else dflag = FALSE; if (dflag) { /* (i+1, rb+1) is a partition point. */ tp = i+rb+2; tn = DNODE; } else { tp = DP[rb+1]; tn = DN[rb+1]; } if (c+hg > h) { h = c+hg; hp = tp; hn = tn; } if (c+vg > v) { v = c+vg; vp = tp; vn = tn; } if (c+dg > d) { d = c+dg; dp = tp; dn = tn; } } if (flag) { /* save information in partition nodes */ PP[DNODE][b] = dp; PN[DNODE][b] = dn; PP[VNODE][b] = vp; PN[VNODE][b] = vn; PP[HNODE][b] = hp; PN[HNODE][b] = hn; } /* Compute the remaining columns */ cal2 = AL2[rb]; for (j=rb-1; j>=lb; --j) { pal2 = AL2[j]; e2 = E2[j+1]; ve2 = VE2[j]; hflag = flag; dflag = vflag; if ((j>=nmid && j<=cmid) || (j>cmid && j<=min(LB[i+1], RB[i]))) { flag = TRUE; b = i+j; RI[b] = i; } else flag = FALSE; if ((j>=cmid && j<=pmid) || (i+1pmid && j<=min(LB[i+2], RB[i+1]))) { vflag = TRUE; } else vflag = FALSE; /* Compute RD, RH, RV affected by the diagonal edge */ /* c = C1[i+1] + C2[j+1]; */ c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) { cj = ET(cal2[J],e2[J]); c += PD(StoA[L1+I],StoA[L2+J],ci,cj); } } hg = vg = dg = RD[j+1]; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; } } /* for (I=0; I h) { h = c+hg; hp = tp; hn = tn; } if (c+vg > v) { v = c+vg; vp = tp; vn = tn; } if (c+dg > d) { d = c+dg; dp = tp; dn = tn; } /* Compute RD, RH, RV affected by the vertical edge */ /* c = C1[i+1]; */ c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); } hg = dg = vg = RV[j]; for (I=0; I<=M1; ++I) { /* ci = GV(cal1[I]); */ ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ej = GV(ve2[J]); pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][ej]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][ej]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][ej]; } } /* for (I=0; I h) { h = c+hg; hp = tp; hn = tn; } if (c+vg > v) { v = c+vg; vp = tp; vn = tn; } if (c+dg > d) { d = c+dg; dp = tp; dn = tn; } if (flag) { /* save information in partition nodes */ PP[DNODE][b] = dp; PN[DNODE][b] = dn; PP[VNODE][b] = vp; PN[VNODE][b] = vn; PP[HNODE][b] = hp; PN[HNODE][b] = hn; } /* printf("i=%d, j=%d, d=%d, h=%d, v=%d\n",i,j,d,h,v); */ cal2 = pal2; } tal1 = cal1; cal1 = pal1; pal1 = tal1; RH[lb] = h; RV[lb] = v; RD[lb] = d; HP[lb] = hp; VP[lb] = vp; DP[lb] = dp; HN[lb] = hn; VN[lb] = vn; DN[lb] = dn; } b = I2 + J2; i1 = I1; j1 = J1; b1 = I1 + J1; if (Node1 == XNODE) { if (d>=h && d>=v) n1 = DNODE; else if (h>=v) n1 = HNODE; else n1 = VNODE; } else n1 = Node1; while (b1 != b) { /* printf("b=%d, b1=%d\n",b,b1); fflush(stdout); */ b2 = PP[n1][b1]; i2 = RI[b2]; j2 = b2 - i2; n2 = PN[n1][b1]; /* printf("i1=%d, j1=%d, n1=%d, i2=%d, j2=%d, n2=%d\n",i1,j1,n1,i2,j2,n2); fflush(stdout); */ if (i1 == i2) { if (j2 > j1) INS(1) } else if (j1 == j2) DEL(1) else if (i1+1 == i2 && j1+1 == j2) { if (n2 == VNODE) { INS(1) DEL(1)} else if (n2 == HNODE) { DEL(1) INS(1) } else REP } else { l_save = LB[i2]; r_save = RB[i2]; if (j1 < (LB[i1]+RB[i1] + 1)/2) { for (i = i2; i > i1; --i) { nmid = (LB[i-1]+RB[i-1] + 1)/2; LB[i] = max(j1, LB[i]); RB[i] = nmid-1; } LB[i1] = j1; RB[i1] = j1; RB[i2] = j2; } else { for (i = i1; i < i2; ++i) { cmid = (LB[i]+RB[i] + 1)/2; LB[i] = max(cmid,LB[i+1])+1; RB[i] = min(j2, RB[i]); } LB[i1] = j1; LB[i2] = j2; RB[i2] = j2; } yama2(i1,j1,n1,i2,j2,n2); LB[i2] = l_save; RB[i2] = r_save; } i1 = i2; j1 = j2; n1 = n2; b1 = b2; } if (Node1 == HNODE) return h; else if (Node1 == DNODE) return d; else if (Node1 == VNODE) return v; else return max3(h,d,v); } /* YAMA3 - initial routine for calling yama3 (aligning within a region) */ int YAMA3(int IL1, int IM1, int IN1, int IL2, int IM2, int IN2, int *ALS, int *SL, int *D_NO, int *I_NO, int *R_NO) { char *pal, *cal, ci, cj, gci, gcj; int i,j,I,J; int t; char *ckalloc(); char *e2; char *ve2; char init_node, end_node; int c, bc, fc; NODE *np, *nq; CHAIN_NODE *cp, *cq; M1 = IM1; N1 = IN1; M2 = IM2; N2 = IN2; L1 = IL1; L2 = IL2; script2AL(L2, M2, &AL2, N2); cal1 = ckalloc(M1+1); pal1 = ckalloc(M1+1); e1 = ckalloc(M1+1); he1 = ckalloc(M1+1); j = (M1+1)*sizeof(int); EN = (int *) ckalloc(j); SI = (int *) ckalloc(j); LO = (int *) ckalloc(j); j = (N2+1) * sizeof(int); RH = (int *) ckalloc(j); RV = (int *) ckalloc(j); RD = (int *) ckalloc(j); HH = (int *) ckalloc(j); VV = (int *) ckalloc(j); DD = (int *) ckalloc(j); /* partition the region into rectangle-like subregions */ CUT = (int *) ckalloc((rmost-lmost+2)*sizeof(int)) - lmost; /* allocate space for partition rows */ j = (rmost-lmost+1) * sizeof(int *); CRH = (int **) ckalloc(j) - lmost; CRV = (int **) ckalloc(j) - lmost; CRD = (int **) ckalloc(j) - lmost; j = (rmost-lmost+1) * sizeof(int); RBRH = (int *) ckalloc(j) - lmost; RBRD = (int *) ckalloc(j) - lmost; RBRV = (int *) ckalloc(j) - lmost; LBHH = (int *) ckalloc(j) - lmost; LBDD = (int *) ckalloc(j) - lmost; LBVV = (int *) ckalloc(j) - lmost; /* allocate space for DAG */ j = (rmost-lmost+1) * sizeof(NODE *); F = (NODE **) ckalloc(j) - lmost; for (i=lmost; i<=rmost; ++i) F[i] = NULL; LastF = (NODE **) ckalloc(j) - lmost; /* pattern matching along the DAG */ Chain = (CHAIN_NODE **) ckalloc((rmost-lmost+1)*sizeof(CHAIN_NODE *)) - lmost; for (i=lmost; i<=rmost; ++i) Chain[i] = NULL; sl = 0; sapp = ALS; last = 0; d_no = 0; i_no = 0; r_no = 0; E2 = (char **) ckalloc((N2+1)*sizeof(char *)); E2[0] = (char *) ckalloc((M2+1)*(N2+1)); VE2 = (char **) ckalloc((N2+1)*sizeof(char *)); VE2[0] = (char *) ckalloc((M2+1)*(N2+1)); for (i=1; i<=N2; ++i) { E2[i] = E2[i-1]+(M2+1); VE2[i] = VE2[i-1]+(M2+1); } /* Compute E2 and VE2 */ for (j=0; j<=N2; ++j) { e2 = E2[j]; ve2 = VE2[j]; for (i=0; i<=M2; ++i) ve2[i] = e2[i] = DASH; } for (i=0; i<=M2; ++i) { j = 0; while (j<=N2 && AL2[j][i]==DASH) { VE2[j][i] = E2[j][i] = LSTAR; ++j; } j = N2; while (j>=1 && AL2[j][i]==DASH) { VE2[j][i] = E2[j][i] = RSTAR; --j; } VE2[j][i] = RSTAR; } if (lmost > 0) { /* starting with some deletions */ DEL(lmost); init_node = VNODE; } else { init_node = XNODE; } end_node = XNODE; LB[lmost] = 0; /* Make sure that it's normalized. */ RB[rmost] = N2; /* printf("lmost=%d, rmost=%d, init_node=%d, end_node=%d\n",lmost,rmost,init_node,end_node); */ divide_region(lmost, rmost); for (i=lmost; i<=rmost; ++i) { if (CUT[i] == 1 || CUT[i+1] == 1) { j = (RB[i]-LB[i]+1) * sizeof(int); CRH[i] = (int *) ckalloc(j) - LB[i]; CRV[i] = (int *) ckalloc(j) - LB[i]; CRD[i] = (int *) ckalloc(j) - LB[i]; } } /* Compute an optimal score from init_node at (lmost,0) to end_node at (rmost,N2) */ bc = backward_divide(lmost,0,init_node,rmost,N2,end_node); cut_off = bc; fc = forward_conquer(lmost,0,init_node,rmost,N2,end_node); if (bc != fc) { printf("lmost=%d, rmost=%d, init_node=%d, end_node=%d\n",lmost,rmost,init_node,end_node); printf("bc=%d, fc=%d\n",bc,fc); } fprintf(stderr,"bc=%d,fc=%d\n",bc,fc); printf("bc=%d,fc=%d\n",bc,fc); /* free CRH, CRV and CRD; Care must be taken because LB might have been modified. */ for (i=lmost; i<=rmost; ++i) { if (CUT[i] == 1 || CUT[i+1] == 1) { free(CRH[i]+LB[i]); free(CRV[i]+LB[i]); free(CRD[i]+LB[i]); } } /* print F */ /* print_F(lmost,rmost); */ FtoDAG(lmost,rmost); /* DAG_traverse(lmost,rmost); */ /* Find all pattern occurrences */ DAG_pat_match(lmost,rmost); /* Find the best path (with the highest pattern score) */ back_chain(lmost,rmost); output_path(lmost,0,init_node,rmost,N2,end_node); if (rmost < N1) DEL(N1-rmost); /* finished with some deletions */ *SL = sl; *D_NO = d_no; *I_NO = i_no; *R_NO = r_no; /* free F */ for (i=lmost; i<=rmost; ++i) { np = F[i]; while (np) { nq = np->link; free(np); np = nq; } } /* free Chain */ for (i=lmost; i<=rmost; ++i) { cp = Chain[i]; while (cp) { cq = cp->link; free(cp); cp = cq; } } free(RH); free(RV); free(RD); free(HH); free(VV); free(DD); free(E2[0]); free(E2); free(VE2[0]); free(VE2); free(AL2[0]); free(AL2); free(EN); free(SI); free(LO); free(pal1); free(cal1); free(e1); free(he1); free(CUT+lmost); free(RBRH+lmost); free(RBRD+lmost); free(RBRV+lmost); free(LBHH+lmost); free(LBDD+lmost); free(LBVV+lmost); free(F+lmost); free(LastF+lmost); free(Chain+lmost); return bc; } /* divide_region - divide the region into several subregions which can be covered by some rectangles whose total area is no more than 2 * region area, and total perimeter is linear to sum of two sequences's length */ int divide_region(int I1, int I2) { int i, i0, sw, w; CUT[I1] = 1; for (i = I1+1; i <= I2; ++i) CUT[i] = 0; i0 = I1; sw = RB[I1] - LB[I1] + 1; for (i = I1+1; i <= I2; ++i) { w = RB[i] - LB[i] + 1; sw += w; if ((RB[i] - LB[i0] + 1) * (i - i0 + 1) > 2 * sw) { CUT[i] = 1; i0 = i; sw = w; /* fprintf(stderr,"cut=%d\n",i); */ } } CUT[I2+1] = 1; } /* forward_conquer - a forward computation to solve each divided subproblems; return an optimal score of two alignments (within a region) */ int forward_conquer(int I1, int J1, char Node1, int I2, int J2, char Node2) { int i, j, I, J, k; int h, v, d, t, c; int oh, ov, od; int hg, vg, dg; char *cal2, *pal2; char ci, cj, pi, pj, ei, ej; char *e2, *ve2; int lb, rb; int *crh, *crd, *crv; int lj, rj; int i1, i2; int *HH1, *DD1, *VV1, *HH2, *DD2, *VV2; char *ckalloc(); /* Forward computation */ /* Initialize the first row */ if (Node1 == XNODE) h = v = d = HH[J1] = VV[J1] = DD[J1] = 0; else { h = v = d = HH[J1] = VV[J1] = DD[J1] = MININT; if (Node1 == HNODE) h = HH[J1] = 0; else if (Node1 == VNODE) v = VV[J1] = 0; else d = DD[J1] = 0; } /* initialize script pointers */ flocate_entry(EN,LO,SI,M1,L1,I1,N1); script2col(EN,SI,cal1,M1,L1); script2hei(EN,SI,he1,M1,L1); pal2 = AL2[J1]; for (j=J1+1; j<=RB[I1]; ++j) { FORWARD_FIRST_ROW HH[j] = h; DD[j] = d; VV[j] = v; } for (j=RB[I1]+1; j<=J2; ++j) { HH[j] = h+MININT; DD[j] = d+MININT; VV[j] = v+MININT; } /* save LB scores */ LBHH[I1] = HH[J1]; LBDD[I1] = DD[J1]; LBVV[I1] = VV[J1]; /* Compute dynamic programming matrix */ i1 = I1; /* save row i1 */ j = (RB[i1]-LB[i1]+1)*sizeof(int); HH1 = (int *) ckalloc(j) - LB[i1]; DD1 = (int *) ckalloc(j) - LB[i1]; VV1 = (int *) ckalloc(j) - LB[i1]; for (j=LB[i1]; j<=RB[i1]; ++j) { HH1[j] = HH[j]; DD1[j] = DD[j]; VV1[j] = VV[j]; } lj = J1; while (i1 <= I2) { i2 = i1+1; while (CUT[i2] != 1) ++i2; /* partition the region */ /* compute lj */ crh = CRH[i1]; crd = CRD[i1]; crv = CRV[i1]; for (k=max(lj,LB[i1]); k<=RB[i1]; ++k) { t = max3(HH1[k]+crh[k], DD1[k]+crd[k], VV1[k]+crv[k]); if (t>=cut_off) { c = t; lj = k; break; } } if (lj == -1) printf("error: i1=%d, c=%d, lb=%d, rb=%d, lj=%d\n",i1,c,LB[i1],RB[i1],lj); fflush(stdout); for (j=max(lj,LB[i1]); j<=RB[i1]; ++j) { HH[j] = HH1[j]; DD[j] = DD1[j]; VV[j] = VV1[j]; } LBHH[i1] = HH[lj]; LBDD[i1] = DD[lj]; LBVV[i1] = VV[lj]; flocate_entry(EN,LO,SI,M1,L1,i1,N1); script2col(EN,SI,cal1,M1,L1); for (i=i1+1; i=LB[i2-1]; --k) { t = max3(HH[k]+crh[k], DD[k]+crd[k], VV[k]+crv[k]); if (t>=cut_off) { c = t; rj = k; break; } } if (rj == -1) printf("error: i=%d, c=%d, lb=%d, rb=%d, rj=%d\n",i2,c,LB[i2-1],RB[i2-1],rj); fflush(stdout); i = i2; if (i <= I2) { /* Forward to row i2 */ FORWARD_INIT(lj,J2) FORWARD_LB(lj) FORWARD_MOVE /* save LB scores */ LBHH[i] = HH[lb]; LBDD[i] = DD[lb]; LBVV[i] = VV[lb]; /* save row i2 */ j = (RB[i2]-LB[i2]+1)*sizeof(int); HH2 = (int *) ckalloc(j) - LB[i2]; DD2 = (int *) ckalloc(j) - LB[i2]; VV2 = (int *) ckalloc(j) - LB[i2]; for (j=LB[i2]; j<=RB[i2]; ++j) { HH2[j] = HH[j]; DD2[j] = DD[j]; VV2[j] = VV[j]; } } oh = HH[rb]; od = DD[rb]; ov = VV[rb]; /* Modify RBRH, RBRD and RBRV */ RBRH[i2-1] = crh[rj]; RBRD[i2-1] = crd[rj]; RV[rj] = RBRV[i2-1] = crv[rj]; locate_entry(EN,LO,SI,M1,L1,i2-1,N1); script2col(EN,SI,pal1,M1,L1); /* printf("i2-1=%d, h=%d, d=%d, v=%d\n",i2-1,RBRH[i2-1],RBRD[i2-1],RBRV[i2-1]); */ for (i=i2-2; i>=i1; --i) { if (RB[i] < rj) break; BACKWARD_INIT(lj,rj) BACKWARD_RB(rj) RBRH[i] = h; RBRD[i] = d; RV[rj] = RBRV[i] = v; /* printf("i=%d, h=%d, d=%d, v=%d\n",i,h,d,v); */ } /* printf("call: i1=%d, j1=%d, i2=%d, j2=%d\n",i1,lj,i2-1,rj); printf("call: HH=%d %d, DD=%d %d, VV=%d %d\n",HH1[lj],HH1[lj+1],DD1[lj],DD1[lj+1],VV1[lj],VV1[lj+1]); */ suboptimal(i1,lj,i2-1,rj,HH1,DD1,VV1,LBHH,LBDD,LBVV, CRH[i2-1],CRD[i2-1],CRV[i2-1],RBRH,RBRD,RBRV,TRUE,TRUE); free(HH1+LB[i1]); free(DD1+LB[i1]); free(VV1+LB[i1]); HH1 = HH2; DD1 = DD2; VV1 = VV2; i1 = i2; } /* fprintf(stderr,"d=%d, h=%d, v=%d\n",d,h,v); */ if (Node2 == HNODE) return oh; else if (Node2 == DNODE) return od; else if (Node2 == VNODE) return ov; else return max3(oh,od,ov); } /* backward_divide - save Score+ on partition row; return an optimal score of two alignments (within a region) */ int backward_divide(int I1, int J1, char Node1, int I2, int J2, char Node2) { int i, j, I, J; int h, v, d, t, c; int hg, vg, dg; char *cal2, *pal2; char ci, cj, pi, pj, ei, ej; char *e2, *ve2; int lb, rb; int *crh, *crd, *crv; /* Backward computation */ /* Initialize the last row */ if (Node2 == XNODE) h = v = d = RH[J2] = RV[J2] = RD[J2] = 0; else { h = v = d = RH[J2] = RV[J2] = RD[J2] = MININT; if (Node2 == HNODE) h = RH[J2] = 0; else if (Node2 == VNODE) RV[J2] = 0; else RD[J2] = 0; } /* initialize script pointers */ locate_entry(EN,LO,SI,M1,L1,I2,N1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); cal2 = AL2[J2]; for (j=J2-1; j>=LB[I2]; --j) { BACKWARD_LAST_ROW RH[j] = h; RD[j] = d; RV[j] = v; } for (j=LB[I2]-1; j>=J1; --j) { RH[j] = h+MININT; RD[j] = d+MININT; RV[j] = v+MININT; } /* save partition rows */ crh = CRH[I2]; crd = CRD[I2]; crv = CRV[I2]; for (j=LB[I2]; j <= RB[I2]; ++j) { crh[j] = RH[j]; crd[j] = RD[j]; crv[j] = RV[j]; } /* save RB scores */ RBRH[I2] = RH[J2]; RBRD[I2] = RD[J2]; RBRV[I2] = RV[J2]; /* Compute dynamic programming matrix */ for (i=I2-1; i>=I1; --i) { BACKWARD_INIT(J1,J2) BACKWARD_RB(J2) BACKWARD_MOVE /* save partition rows */ if (CUT[i]==1 || CUT[i+1]==1) { crh = CRH[i]; crd = CRD[i]; crv = CRV[i]; for (j=LB[i]; j <= RB[i]; ++j) { crh[j] = RH[j]; crd[j] = RD[j]; crv[j] = RV[j]; } } /* save RB scores */ RBRH[i] = RH[RB[i]]; RBRD[i] = RD[RB[i]]; RBRV[i] = RV[RB[i]]; } if (Node1 == HNODE) return h; else if (Node1 == DNODE) return d; else if (Node1 == VNODE) return v; else return max3(h,d,v); } /* suboptimal - compute all suboptimal pairs in [I1,J1] x [I2,J2] */ /* Initial vectors: THH, TDD, TVV, LHH, LDD, LVV; Initial vectors: BRH, BRD, BRV, RRH, RRD, RRV; Lflag is TRUE if the leftmost column should be output in the current subproblem; Tflag is TRUE if the top of the rectangle should be output in the current subproblem; */ int suboptimal(int I1, int J1, int I2, int J2, int *THH, int *TDD, int *TVV, int *LHH, int *LDD, int *LVV, int *BRH, int *BRD, int *BRV, int *RRH, int *RRD, int *RRV, char Lflag, char Tflag) { int *MIHH, *MIDD, *MIVV, *MJHH, *MJDD, *MJVV; int *MIRH, *MIRD, *MIRV, *MJRH, *MJRD, *MJRV; int *LBS, *RBS, *TS, *BS; int midI, midJ; int lb, rb; int i, j, I, J; int h, d, v, hg, dg, vg, c, tc, ti, tj; int minI, maxI, minJ, maxJ; char *ckalloc(); char *cal2, *pal2; char ci, cj, pi, pj, ei, ej; char *e2, *ve2; int *THH1, *TDD1, *TVV1, *LHH1, *LDD1, *LVV1; int *BRH1, *BRD1, *BRV1, *RRH1, *RRD1, *RRV1; char lf, tf; NODE *np; /* printf("i1=%d, j1=%d, i2=%d, j2=%d, lflag=%d, tflag=%d\n",I1,J1,I2,J2,Lflag,Tflag); printf("LB=%d, RB=%d\n",LB[I1],RB[I1]); */ if (I1+1>=I2 || J1+1>=J2) { if (I1 >= I2) { /* one row */ /* Append DAG vertices */ if (Tflag) { if (Lflag) CHECK_NODE(I1,J1,THH[J1],TDD[J1],TVV[J1],BRH[J1],BRD[J1],BRV[J1]) for (j=J1+1; j<=J2; ++j) CHECK_NODE(I1,j,THH[j],TDD[j],TVV[j],BRH[j],BRD[j],BRV[j]) } } else if (J1 >= J2) { /* one column */ /* Append DAG vertices */ if (Lflag) { if (Tflag) CHECK_NODE(I1,J1,LHH[I1],LDD[I1],LVV[I1],RRH[I1],RRD[I1],RRV[I1]) /* printf("LHH=%d,LDD=%d,LVV=%d,RRH=%d,RRD=%d,RRV=%d\n",LHH[I1],LDD[I1],LVV[I1],RRH[I1],RRD[I1],RRV[I1]); printf("THH=%d,TDD=%d,TVV=%d\n",THH[J1],TDD[J1],TVV[J1]); */ for (i=I1+1; i<=I2; ++i) CHECK_NODE(i,J1,LHH[i],LDD[i],LVV[i],RRH[i],RRD[i],RRV[i]) } } else if (I1+1 == I2) { /* two rows */ /* Allocate temporary storage */ lb = max(J1,LB[I2]); j = (J2-lb+1) * sizeof(int); THH1 = (int *) ckalloc(j) - lb; TDD1 = (int *) ckalloc(j) - lb; TVV1 = (int *) ckalloc(j) - lb; /* Compute THH1, TDD1 and TVV1 */ THH1[lb] = LHH[I2]; TDD1[lb] = LDD[I2]; TVV1[lb] = LVV[I2]; rb = min(J2,RB[I1]); flocate_entry(EN,LO,SI,M1,L1,I1,N1); script2col(EN,SI,pal1,M1,L1); forward_script(EN,LO,SI,M1,L1); script2ei(EN,SI,e1,M1,L1); script2col(EN,SI,cal1,M1,L1); script2hei(EN,SI,he1,M1,L1); pal2 = AL2[lb]; for (j=lb+1; j<=J2; ++j) { cal2 = AL2[j]; e2 = E2[j]; ve2 = VE2[j]; /* Compute TDD1 affected by the diagonal edge */ if (j <= rb+1) { c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) { cj = ET(cal2[J],e2[J]); c += PD(StoA[L1+I],StoA[L2+J],ci,cj); } } hg = THH[j-1] + c; dg = TDD[j-1] + c; vg = TVV[j-1] + c; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; } } TDD1[j] = max3(hg,dg,vg); } else TDD1[j] = MININT; /* Compute THH1 affected by the horizontal edge */ c = 0; for (J=0; J<=M2; ++J) { cj = ET(cal2[J],e2[J]); for (I=0; I<=M1; ++I) c += PD(StoA[L1+I],StoA[L2+J],he1[I],cj); } hg = THH1[j-1] + c; dg = TDD1[j-1] + c; vg = TVV1[j-1] + c; for (I=0; I<=M1; ++I) { ci = cal1[I]; ei = GV(he1[I]); for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ei][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][ci!=DASH][0][ei][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][ci!=DASH][pj!=DASH][ei][cj]; } } THH1[j] = max3(hg,dg,vg); /* Compute VV affected by the vertical edge */ if (j <= rb) { c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); } hg = THH[j] + c; dg = TDD[j] + c; vg = TVV[j] + c; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ej = GV(ve2[J]); cj = cal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][cj!=DASH][ci][ej]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][ej]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][cj!=DASH][ci][ej]; } } TVV1[j] = max3(hg,dg,vg); } else TVV1[j] = MININT; pal2 = cal2; } /* Allocate temporary storage */ rb = min(J2,RB[I1]); j = (rb-J1+1) * sizeof(int); BRH1 = (int *) ckalloc(j) - J1; BRD1 = (int *) ckalloc(j) - J1; BRV1 = (int *) ckalloc(j) - J1; /* Compute BRH1, BRD1 and BRV1 */ BRH1[rb] = RRH[I1]; BRD1[rb] = RRD[I1]; BRV1[rb] = RRV[I1]; lb = max(J1,LB[I2]); locate_entry(EN,LO,SI,M1,L1,I2,N1); script2col(EN,SI,cal1,M1,L1); script2ei(EN,SI,e1,M1,L1); backward_script(EN,LO,SI,M1,L1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); cal2 = AL2[rb]; for (j=rb-1; j>=J1; --j) { pal2 = AL2[j]; e2 = E2[j+1]; ve2 = VE2[j]; /* Compute RD, RH, RV affected by the horizontal edge */ c = 0; for (I=0; I<=M2; ++I) { cj = ET(cal2[I],e2[I]); for (J=0; J<=M1; ++J) c += PD(StoA[L1+J],StoA[L2+I],he1[J],cj); } hg = vg = dg = BRH1[j+1]; for (I=0; I<=M1; ++I) { pi = pal1[I]; ei = GV(he1[I]); for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ei][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ei][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ei][cj]; } } h = c+hg; v = c+vg; d = c+dg; /* Compute BRH1, BRD1, BRV1 affected by the diagonal edge */ if (j >= lb-1) { c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) { cj = ET(cal2[J],e2[J]); c += PD(StoA[L1+I],StoA[L2+J],ci,cj); } } hg = vg = dg = BRD[j+1]; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; } } if (c+hg > h) h = c+hg; if (c+vg > v) v = c+vg; if (c+dg > d) d = c+dg; } /* Compute RD, RH, RV affected by the vertical edge */ if (j >= lb) { c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); } hg = dg = vg = BRV[j]; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ej = GV(ve2[J]); pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][ej]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][ej]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][ej]; } } if (c+hg > h) h = c+hg; if (c+vg > v) v = c+vg; if (c+dg > d) d = c+dg; } cal2 = pal2; BRH1[j] = h; BRD1[j] = d; BRV1[j] = v; } /* Append DAG vertices */ if (Tflag) { if (Lflag) CHECK_NODE(I1,J1,THH[J1],TDD[J1],TVV[J1],BRH1[J1],BRD1[J1],BRV1[J1]) for (j=J1+1; j<=min(J2,RB[I1]); ++j) CHECK_NODE(I1,j,THH[j],TDD[j],TVV[j],BRH1[j],BRD1[j],BRV1[j]) } lb = max(J1,LB[I2]); if (Lflag || lb>J1) CHECK_NODE(I2,lb,THH1[lb],TDD1[lb],TVV1[lb],BRH[lb],BRD[lb],BRV[lb]) for (j=lb+1; j<=J2; ++j) CHECK_NODE(I2,j,THH1[j],TDD1[j],TVV1[j],BRH[j],BRD[j],BRV[j]) lb = max(J1,LB[I2]); free(THH1+lb); free(TDD1+lb); free(TVV1+lb); free(BRH1+J1); free(BRD1+J1); free(BRV1+J1); } else { /* two columns */ j = (I2-I1+1) * sizeof(int); LHH1 = (int *) ckalloc(j) - I1; LDD1 = (int *) ckalloc(j) - I1; LVV1 = (int *) ckalloc(j) - I1; RRH1 = (int *) ckalloc(j) - I1; RRD1 = (int *) ckalloc(j) - I1; RRV1 = (int *) ckalloc(j) - I1; ti = I1; while (tiI1 && LB[ti]>J1) --ti; if (ti == I2) { RRH1[ti] = BRH[J1]; RRD1[ti] = BRD[J1]; RRV1[ti] = BRV[J1]; i = ti-1; } else i = ti; pal2 = AL2[J1]; ve2 = VE2[J1]; cal2 = AL2[J2]; e2 = E2[J2]; locate_entry(EN,LO,SI,M1,L1,i+1,N1); script2col(EN,SI,pal1,M1,L1); while (i>=I1 && RB[i]>J1) { tal1 = cal1; cal1 = pal1; pal1 = tal1; /* The order of the procedure calls is important. */ script2ei(EN,SI,e1,M1,L1); backward_script(EN,LO,SI,M1,L1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); /* Compute RD, RH, RV affected by the diagonal edge */ c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) { cj = ET(cal2[J],e2[J]); c += PD(StoA[L1+I],StoA[L2+J],ci,cj); } } hg = vg = dg = RRD[i+1]; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; } } h = c+hg; v = c+vg; d = c+dg; /* Compute RD, RH, RV affected by the horizontal edge */ c = 0; for (I=0; I<=M2; ++I) { cj = ET(cal2[I],e2[I]); for (J=0; J<=M1; ++J) c += PD(StoA[L1+J],StoA[L2+I],he1[J],cj); } hg = vg = dg = RRH[i]; for (I=0; I<=M1; ++I) { pi = pal1[I]; ei = GV(he1[I]); for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ei][cj]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ei][cj]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ei][cj]; } } if (c+hg > h) h = c+hg; if (c+vg > v) v = c+vg; if (c+dg > d) d = c+dg; /* Compute RD, RH, RV affected by the vertical edge */ if (i != ti) { c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); } hg = dg = vg = RRV1[i+1]; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ej = GV(ve2[J]); pj = pal2[J]; hg -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][ej]; vg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][ej]; dg -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][ej]; } } if (c+hg > h) h = c+hg; if (c+vg > v) v = c+vg; if (c+dg > d) d = c+dg; } RRH1[i] = h; RRD1[i] = d; RRV1[i] = v; --i; } if (Tflag) { rb = min(J2,RB[I1]); if (rb>J1) { if (Lflag) CHECK_NODE(I1,J1,LHH[I1],LDD[I1],LVV[I1],RRH1[I1],RRD1[I1],RRV1[I1]) CHECK_NODE(I1,J2,LHH1[I1],LDD1[I1],LVV1[I1],RRH[I1],RRD[I1],RRV[I1]) } else { if (Lflag) CHECK_NODE(I1,J1,LHH[I1],LDD[I1],LVV[I1],RRH[I1],RRD[I1],RRV[I1]) } } /* Append DAG vertices */ for (i=I1+1; i<=I2; ++i) { lb = max(J1,LB[i]); rb = min(J2,RB[i]); if (Lflag && lb == J1) { if (rb == J1) CHECK_NODE(i,J1,LHH[i],LDD[i],LVV[i],RRH[i],RRD[i],RRV[i]) else CHECK_NODE(i,J1,LHH[i],LDD[i],LVV[i],RRH1[i],RRD1[i],RRV1[i]) } if (rb == J2) { if (lb == J2) CHECK_NODE(i,J2,LHH[i],LDD[i],LVV[i],RRH[i],RRD[i],RRV[i]) else CHECK_NODE(i,J2,LHH1[i],LDD1[i],LVV1[i],RRH[i],RRD[i],RRV[i]) } } free(LHH1+I1); free(LDD1+I1); free(LVV1+I1); free(RRH1+I1); free(RRD1+I1); free(RRV1+I1); } return; } midI = (I1+I2)/2; midJ = (J1+J2)/2; /* allocate space for the middle row */ lb = max(J1,LB[midI]); rb = min(J2,RB[midI]); j = (rb-lb+1) * sizeof(int); MIHH = (int *) ckalloc(j) - lb; MIDD = (int *) ckalloc(j) - lb; MIVV = (int *) ckalloc(j) - lb; MIRH = (int *) ckalloc(j) - lb; MIRD = (int *) ckalloc(j) - lb; MIRV = (int *) ckalloc(j) - lb; /* allocate space for TS and BS */ rb = min(RB[I1],J2); j = (rb-J1+1) * sizeof(int); TS = (int *) ckalloc(j) - J1; lb = max(LB[I2],J1); j = (J2-lb+1) * sizeof(int); BS = (int *) ckalloc(j) - lb; /* allocate space for the middle column */ j = (I2-I1+1) * sizeof(int); MJHH = (int *) ckalloc(j) - I1; MJDD = (int *) ckalloc(j) - I1; MJVV = (int *) ckalloc(j) - I1; MJRH = (int *) ckalloc(j) - I1; MJRD = (int *) ckalloc(j) - I1; MJRV = (int *) ckalloc(j) - I1; LBS = (int *) ckalloc(j) - I1; RBS = (int *) ckalloc(j) - I1; /* A linear-space forward computation */ /* initialize the first row */ lb = J1; rb = min(J2,RB[I1]); for (j=lb; j<=rb; ++j) { HH[j] = THH[j]; DD[j] = TDD[j]; VV[j] = TVV[j]; } for (j=rb+1; j<=J2; ++j) { HH[j] = DD[j] = VV[j] = MININT; } /* save the middle column */ if (midJ>=lb && midJ<=rb) { MJHH[I1] = HH[midJ]; MJDD[I1] = DD[midJ]; MJVV[I1] = VV[midJ]; } /* compute RBS */ RBS[I1] = max3(THH[rb]+RRH[I1],TDD[rb]+RRD[I1],TVV[rb]+RRV[I1]); flocate_entry(EN,LO,SI,M1,L1,I1,N1); script2col(EN,SI,cal1,M1,L1); for (i=I1+1; i<=I2; ++i) { FORWARD_INIT(J1,J2) /* initialize the first column */ h = LHH[i]; d = LDD[i]; v = LVV[i]; /* compute the remaining columns */ FORWARD_MOVE /* save the middle column */ if (midJ>=lb && midJ<=rb) { MJHH[i] = HH[midJ]; MJDD[i] = DD[midJ]; MJVV[i] = VV[midJ]; } /* save the middle row */ if (i == midI) { for (j=lb; j<=rb; ++j) { MIHH[j] = HH[j]; MIDD[j] = DD[j]; MIVV[j] = VV[j]; } } /* compute RBS */ RBS[i] = max3(HH[rb]+RRH[i],DD[rb]+RRD[i],VV[rb]+RRV[i]); } for (j=lb; j<=J2; ++j) BS[j] = max3(HH[j]+BRH[j],DD[j]+BRD[j],VV[j]+BRV[j]); /* A linear-space backward computation */ /* Initialize the last row */ lb = max(J1,LB[I2]); rb = J2; for (j=rb; j>=lb; --j) { RH[j] = BRH[j]; RD[j] = BRD[j]; RV[j] = BRV[j]; } for (j=lb-1; j>=J1; --j) { RH[j] = MININT; RD[j] = MININT; RV[j] = MININT; } /* save the middle column */ if (midJ>=lb && midJ<=rb) { MJRH[I2] = RH[midJ]; MJRD[I2] = RD[midJ]; MJRV[I2] = RV[midJ]; } /* compute LBS */ LBS[I2] = max3(BRH[lb]+LHH[I2],BRD[lb]+LDD[I2],BRV[lb]+LVV[I2]); locate_entry(EN,LO,SI,M1,L1,I2,N1); script2col(EN,SI,pal1,M1,L1); for (i=I2-1; i>=I1; --i) { BACKWARD_INIT(J1,J2) /* initialize the last column */ h = RRH[i]; d = RRD[i]; v = RRV[i]; /* compute the remaining columns */ BACKWARD_MOVE /* save the middle column */ if (midJ>=lb && midJ<=rb) { MJRH[i] = RH[midJ]; MJRD[i] = RD[midJ]; MJRV[i] = RV[midJ]; } /* save the middle row */ if (i == midI) { for (j=lb; j<=rb; ++j) { MIRH[j] = RH[j]; MIRD[j] = RD[j]; MIRV[j] = RV[j]; } } /* compute LBS */ LBS[i] = max3(RH[lb]+LHH[i],RD[lb]+LDD[i],RV[lb]+LVV[i]); } for (j=J1; j<=rb; ++j) TS[j] = max3(RH[j]+THH[j],RD[j]+TDD[j],RV[j]+TVV[j]); /* printf("i1=%d, j1=%d, i2=%d, j2=%d, midI=%d, midJ=%d, c1=%d, c2=%d\n",I1,J1,I2,J2,midI,midJ,TS[J1],BS[J2]); */ /* Subproblem 1: Shrink [I1,midI] x [J1,midJ] */ minJ = minI = MAXINT; maxJ = maxI = MININT; for (i=I1,lb=max(LB[i],J1); i<=midI && lb<=midJ; ++i,lb=max(LB[i],J1)) { UPDATE_INDEX(LBS[i],i,lb) rb = min(RB[i],J2); if (rb<=midJ) { tc = RBS[i]; tj = rb; } else { tc = max3(MJHH[i]+MJRH[i],MJDD[i]+MJRD[i],MJVV[i]+MJRV[i]); tj = midJ; } UPDATE_INDEX(tc,i,tj) } for (j=J1; j<=min(RB[I1],midJ); ++j) UPDATE_INDEX(TS[j],I1,j) for (j=max(LB[midI],J1); j<=min(RB[midI],midJ); ++j) { tc = max3(MIHH[j]+MIRH[j],MIDD[j]+MIRD[j],MIVV[j]+MIRV[j]); UPDATE_INDEX(tc,midI,j) } if (minI < MAXINT) { if ((minI > I1 && minJ > J1) || (maxI < midI && maxJ < midJ)) printf("error: sub1 i1=%d, j1=%d, i2=%d, j2=%d, minI=%d, maxI=%d, minJ=%d, maxJ=%d\n",I1,J1,I2,J2,minI,maxI,minJ,maxJ); fflush(stdout); rb = min(maxJ,RB[minI]); j = (rb-minJ+1) * sizeof(int); THH1 = (int *) ckalloc(j) - minJ; TDD1 = (int *) ckalloc(j) - minJ; TVV1 = (int *) ckalloc(j) - minJ; lb = max(minJ,LB[maxI]); j = (maxJ-lb+1) * sizeof(int); BRH1 = (int *) ckalloc(j) - lb; BRD1 = (int *) ckalloc(j) - lb; BRV1 = (int *) ckalloc(j) - lb; j = (maxI-minI+1) * sizeof(int); LHH1 = (int *) ckalloc(j) - minI; LDD1 = (int *) ckalloc(j) - minI; LVV1 = (int *) ckalloc(j) - minI; RRH1 = (int *) ckalloc(j) - minI; RRD1 = (int *) ckalloc(j) - minI; RRV1 = (int *) ckalloc(j) - minI; /* Initialize THH1, TDD1 and TVV1 */ if (minI == I1) { for (j=minJ; j<=min(maxJ,RB[minI]); ++j) { THH1[j] = THH[j]; TDD1[j] = TDD[j]; TVV1[j] = TVV[j]; } } else { h = THH1[minJ] = LHH[minI]; d = TDD1[minJ] = LDD[minI]; v = TVV1[minJ] = LVV[minI]; flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); script2hei(EN,SI,he1,M1,L1); pal2 = AL2[minJ]; for (j=minJ+1; j<=min(maxJ,RB[minI]); ++j) { FORWARD_FIRST_ROW THH1[j] = h; TDD1[j] = d; TVV1[j] = v; } } /* Initialize BRH1, BRD1 and BRV1 */ if (maxI == midI) { for (j=max(minJ,LB[maxI]); j<=maxJ; ++j) { BRH1[j] = MIRH[j]; BRD1[j] = MIRD[j]; BRV1[j] = MIRV[j]; } } else { if (RB[maxI] < midJ) { h = BRH1[maxJ] = RRH[maxI]; d = BRD1[maxJ] = RRD[maxI]; v = BRV1[maxJ] = RRV[maxI]; /* printf("rb=midJ, RB=%d, midJ=%d\n",RB[maxI],midJ); */ } locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); cal2 = AL2[maxJ]; /* printf("h=%d,d=%d,v=%d\n",h,d,v); */ for (j=maxJ-1; j>=max(minJ,LB[maxI]); --j) { BACKWARD_LAST_ROW BRH1[j] = h; BRD1[j] = d; BRV1[j] = v; /* printf("h=%d,d=%d,v=%d\n",h,d,v); */ } } /* Initialize LHH1, LDD1 and LVV1 */ if (minJ == J1) { for (i=minI; i<=maxI; ++i) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; } } else { LHH1[minI] = THH[minJ]; LDD1[minI] = TDD[minJ]; LVV1[minI] = TVV[minJ]; flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); i = minI+1; while (i<=maxI && LB[i]<=minJ) { FORWARD_minJ ++i; } while (i<=maxI) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; ++i; } } /* Initialize RRH1, RRD1 and RRV1 */ if (maxJ == midJ) { for (i=maxI; i>=minI; --i) { if (RB[i]>=midJ) { RRH1[i] = MJRH[i]; RRD1[i] = MJRD[i]; RRV1[i] = MJRV[i]; } else { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; } } } else { RRH1[maxI] = MIRH[maxJ]; RRD1[maxI] = MIRD[maxJ]; RRV1[maxI] = MIRV[maxJ]; locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); i = maxI-1; while (i>=minI && RB[i]>=maxJ) { BACKWARD_maxJ --i; } while (i>=minI) { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; --i; } } if (minI > I1) tf = TRUE; else tf = Tflag; if (minJ > J1) lf = TRUE; else lf = Lflag; /* printf("Subproblem1:minI=%d,minJ=%d,maxI=%d,maxJ=%d\n",minI,minJ,maxI,maxJ); */ suboptimal(minI,minJ,maxI,maxJ,THH1,TDD1,TVV1,LHH1,LDD1,LVV1, BRH1,BRD1,BRV1,RRH1,RRD1,RRV1,lf,tf); free(THH1+minJ); free(TDD1+minJ); free(TVV1+minJ); lb = max(minJ,LB[maxI]); free(BRH1+lb); free(BRD1+lb); free(BRV1+lb); free(LHH1+minI); free(LDD1+minI); free(LVV1+minI); free(RRH1+minI); free(RRD1+minI); free(RRV1+minI); } /* Subproblem 2: Shrink [I1,midI] x [midJ,J2] */ minJ = minI = MAXINT; maxJ = maxI = MININT; for (i=midI,rb=min(RB[i],J2); i>=I1 && rb>=midJ; --i,rb=min(RB[i],J2)) { UPDATE_INDEX(RBS[i],i,rb) lb = max(LB[i],J1); if (lb>=midJ) { tc = LBS[i]; tj = lb; } else { tc = max3(MJHH[i]+MJRH[i],MJDD[i]+MJRD[i],MJVV[i]+MJRV[i]); tj = midJ; } UPDATE_INDEX(tc,i,tj) } for (j=midJ; j<=min(RB[I1],J2); ++j) UPDATE_INDEX(TS[j],I1,j) for (j=max(LB[midI],midJ); j<=min(RB[midI],J2); ++j) { tc = max3(MIHH[j]+MIRH[j],MIDD[j]+MIRD[j],MIVV[j]+MIRV[j]); UPDATE_INDEX(tc,midI,j) } if (minI < MAXINT) { if ((minI > I1 && minJ > midJ) || (maxI < midI && maxJ < J2)) printf("error: sub2 i1=%d, j1=%d, i2=%d, j2=%d, minI=%d, maxI=%d, minJ=%d, maxJ=%d\n",I1,J1,I2,J2,minI,maxI,minJ,maxJ); fflush(stdout); rb = min(maxJ,RB[minI]); j = (rb-minJ+1) * sizeof(int); THH1 = (int *) ckalloc(j) - minJ; TDD1 = (int *) ckalloc(j) - minJ; TVV1 = (int *) ckalloc(j) - minJ; lb = max(minJ,LB[maxI]); j = (maxJ-lb+1) * sizeof(int); BRH1 = (int *) ckalloc(j) - lb; BRD1 = (int *) ckalloc(j) - lb; BRV1 = (int *) ckalloc(j) - lb; j = (maxI-minI+1) * sizeof(int); LHH1 = (int *) ckalloc(j) - minI; LDD1 = (int *) ckalloc(j) - minI; LVV1 = (int *) ckalloc(j) - minI; RRH1 = (int *) ckalloc(j) - minI; RRD1 = (int *) ckalloc(j) - minI; RRV1 = (int *) ckalloc(j) - minI; /* Initialize THH1, TDD1 and TVV1 */ if (minI == I1) { for (j=minJ; j<=min(maxJ,RB[minI]); ++j) { THH1[j] = THH[j]; TDD1[j] = TDD[j]; TVV1[j] = TVV[j]; } } else { if (LB[minI] >= midJ) { h = THH1[minJ] = LHH[minI]; d = TDD1[minJ] = LDD[minI]; v = TVV1[minJ] = LVV[minI]; } else { h = THH1[minJ] = MJHH[minI]; d = TDD1[minJ] = MJDD[minI]; v = TVV1[minJ] = MJVV[minI]; } flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); script2hei(EN,SI,he1,M1,L1); pal2 = AL2[minJ]; for (j=minJ+1; j<=min(maxJ,RB[minI]); ++j) { FORWARD_FIRST_ROW THH1[j] = h; TDD1[j] = d; TVV1[j] = v; } } /* Initialize BRH1, BRD1 and BRV1 */ if (maxI == midI) { for (j=max(minJ,LB[maxI]); j<=maxJ; ++j) { BRH1[j] = MIRH[j]; BRD1[j] = MIRD[j]; BRV1[j] = MIRV[j]; } } else { h = BRH1[maxJ] = RRH[maxI]; d = BRD1[maxJ] = RRD[maxI]; v = BRV1[maxJ] = RRV[maxI]; locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); cal2 = AL2[maxJ]; for (j=maxJ-1; j>=max(minJ,LB[maxI]); --j) { BACKWARD_LAST_ROW BRH1[j] = h; BRD1[j] = d; BRV1[j] = v; } } /* Initialize LHH1, LDD1 and LVV1 */ if (minJ == midJ) { for (i=minI; i<=maxI; ++i) { if (LB[i] >= midJ) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; } else { LHH1[i] = MJHH[i]; LDD1[i] = MJDD[i]; LVV1[i] = MJVV[i]; } } } else { LHH1[minI] = THH[minJ]; LDD1[minI] = TDD[minJ]; LVV1[minI] = TVV[minJ]; flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); i = minI+1; while (i<=maxI && LB[i]<=minJ) { FORWARD_minJ ++i; } while (i<=maxI) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; ++i; } } /* Initialize RRH1, RRD1 and RRV1 */ if (maxJ == J2) { for (i=maxI; i>=minI; --i) { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; } } else { RRH1[maxI] = MIRH[maxJ]; RRD1[maxI] = MIRD[maxJ]; RRV1[maxI] = MIRV[maxJ]; locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); i = maxI-1; while (i>=minI && RB[i]>=maxJ) { BACKWARD_maxJ --i; } while (i>=minI) { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; --i; } } if (minI > I1) tf = TRUE; else tf = Tflag; if (minJ > midJ) lf = TRUE; else lf = FALSE; /* printf("Subproblem2:minI=%d,minJ=%d,maxI=%d,maxJ=%d\n",minI,minJ,maxI,maxJ); */ suboptimal(minI,minJ,maxI,maxJ,THH1,TDD1,TVV1,LHH1,LDD1,LVV1, BRH1,BRD1,BRV1,RRH1,RRD1,RRV1,lf,tf); free(THH1+minJ); free(TDD1+minJ); free(TVV1+minJ); lb = max(minJ,LB[maxI]); free(BRH1+lb); free(BRD1+lb); free(BRV1+lb); free(LHH1+minI); free(LDD1+minI); free(LVV1+minI); free(RRH1+minI); free(RRD1+minI); free(RRV1+minI); } /* Subproblem 3: Shrink [midI,I2] x [J1,midJ] */ minJ = minI = MAXINT; maxJ = maxI = MININT; for (i=midI,lb=max(LB[i],J1); i<=I2 && lb<=midJ; ++i,lb=max(LB[i],J1)) { UPDATE_INDEX(LBS[i],i,lb) rb = min(RB[i],J2); if (rb<=midJ) { tc = RBS[i]; tj = rb; } else { tc = max3(MJHH[i]+MJRH[i],MJDD[i]+MJRD[i],MJVV[i]+MJRV[i]); tj = midJ; } UPDATE_INDEX(tc,i,tj) } for (j=max(LB[midI],J1); j<=min(RB[midI],midJ); ++j) { tc = max3(MIHH[j]+MIRH[j],MIDD[j]+MIRD[j],MIVV[j]+MIRV[j]); UPDATE_INDEX(tc,midI,j) } for (j=max(J1,LB[I2]); j<=min(RB[midI],midJ); ++j) { UPDATE_INDEX(BS[j],I2,j) } if (minI < MAXINT) { if ((minI > midI && minJ > J1) || (maxI < I2 && maxJ < midJ)) printf("error: sub1 i1=%d, j1=%d, i2=%d, j2=%d, minI=%d, maxI=%d, minJ=%d, maxJ=%d\n",I1,J1,I2,J2,minI,maxI,minJ,maxJ); fflush(stdout); rb = min(maxJ,RB[minI]); j = (rb-minJ+1) * sizeof(int); THH1 = (int *) ckalloc(j) - minJ; TDD1 = (int *) ckalloc(j) - minJ; TVV1 = (int *) ckalloc(j) - minJ; lb = max(minJ,LB[maxI]); j = (maxJ-lb+1) * sizeof(int); BRH1 = (int *) ckalloc(j) - lb; BRD1 = (int *) ckalloc(j) - lb; BRV1 = (int *) ckalloc(j) - lb; j = (maxI-minI+1) * sizeof(int); LHH1 = (int *) ckalloc(j) - minI; LDD1 = (int *) ckalloc(j) - minI; LVV1 = (int *) ckalloc(j) - minI; RRH1 = (int *) ckalloc(j) - minI; RRD1 = (int *) ckalloc(j) - minI; RRV1 = (int *) ckalloc(j) - minI; /* Initialize THH1, TDD1 and TVV1 */ if (minI == midI) { for (j=minJ; j<=min(maxJ,RB[minI]); ++j) { THH1[j] = MIHH[j]; TDD1[j] = MIDD[j]; TVV1[j] = MIVV[j]; } } else { h = THH1[minJ] = LHH[minI]; d = TDD1[minJ] = LDD[minI]; v = TVV1[minJ] = LVV[minI]; flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); script2hei(EN,SI,he1,M1,L1); pal2 = AL2[minJ]; for (j=minJ+1; j<=min(maxJ,RB[minI]); ++j) { FORWARD_FIRST_ROW THH1[j] = h; TDD1[j] = d; TVV1[j] = v; } } /* Initialize BRH1, BRD1 and BRV1 */ if (maxI == I2) { for (j=max(minJ,LB[maxI]); j<=maxJ; ++j) { BRH1[j] = BRH[j]; BRD1[j] = BRD[j]; BRV1[j] = BRV[j]; } } else { if (RB[maxI] < midJ) { h = BRH1[maxJ] = RRH[maxI]; d = BRD1[maxJ] = RRD[maxI]; v = BRV1[maxJ] = RRV[maxI]; /* printf("rb=midJ, RB=%d, midJ=%d\n",RB[maxI],midJ); */ } locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); cal2 = AL2[maxJ]; /* printf("h=%d,d=%d,v=%d\n",h,d,v); */ for (j=maxJ-1; j>=max(minJ,LB[maxI]); --j) { BACKWARD_LAST_ROW BRH1[j] = h; BRD1[j] = d; BRV1[j] = v; /* printf("h=%d,d=%d,v=%d\n",h,d,v); */ } } /* Initialize LHH1, LDD1 and LVV1 */ if (minJ == J1) { for (i=minI; i<=maxI; ++i) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; } } else { LHH1[minI] = MIHH[minJ]; LDD1[minI] = MIDD[minJ]; LVV1[minI] = MIVV[minJ]; flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); i = minI+1; while (i<=maxI && LB[i]<=minJ) { FORWARD_minJ ++i; } while (i<=maxI) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; ++i; } } /* Initialize RRH1, RRD1 and RRV1 */ if (maxJ == midJ) { for (i=maxI; i>=minI; --i) { if (RB[i]>=midJ) { RRH1[i] = MJRH[i]; RRD1[i] = MJRD[i]; RRV1[i] = MJRV[i]; } else { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; } } } else { RRH1[maxI] = BRH[maxJ]; RRD1[maxI] = BRD[maxJ]; RRV1[maxI] = BRV[maxJ]; locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); i = maxI-1; while (i>=minI && RB[i]>=maxJ) { BACKWARD_maxJ --i; } while (i>=minI) { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; --i; } } if (minI > midI) tf = TRUE; else tf = FALSE; if (minJ > J1) lf = TRUE; else lf = Lflag; /* printf("Subproblem3:minI=%d,minJ=%d,maxI=%d,maxJ=%d\n",minI,minJ,maxI,maxJ); */ suboptimal(minI,minJ,maxI,maxJ,THH1,TDD1,TVV1,LHH1,LDD1,LVV1, BRH1,BRD1,BRV1,RRH1,RRD1,RRV1,lf,tf); free(THH1+minJ); free(TDD1+minJ); free(TVV1+minJ); lb = max(minJ,LB[maxI]); free(BRH1+lb); free(BRD1+lb); free(BRV1+lb); free(LHH1+minI); free(LDD1+minI); free(LVV1+minI); free(RRH1+minI); free(RRD1+minI); free(RRV1+minI); } /* Subproblem 4: Shrink [midI,I2] x [midJ,J2] */ minJ = minI = MAXINT; maxJ = maxI = MININT; for (i=I2,rb=min(RB[i],J2); i>=midI && rb>=midJ; --i,rb=min(RB[i],J2)) { UPDATE_INDEX(RBS[i],i,rb) lb = max(LB[i],J1); if (lb>=midJ) { tc = LBS[i]; tj = lb; } else { tc = max3(MJHH[i]+MJRH[i],MJDD[i]+MJRD[i],MJVV[i]+MJRV[i]); tj = midJ; } UPDATE_INDEX(tc,i,tj) } for (j=max(LB[midI],midJ); j<=min(RB[midI],J2); ++j) { tc = max3(MIHH[j]+MIRH[j],MIDD[j]+MIRD[j],MIVV[j]+MIRV[j]); UPDATE_INDEX(tc,midI,j) } for (j=max(midJ,LB[I2]); j<=min(RB[I2],J2); ++j) UPDATE_INDEX(BS[j],I2,j) if (minI < MAXINT) { if ((minI > midI && minJ > midJ) || (maxI < I2 && maxJ < J2)) printf("error: sub2 i1=%d, j1=%d, i2=%d, j2=%d, minI=%d, maxI=%d, minJ=%d, maxJ=%d\n",I1,J1,I2,J2,minI,maxI,minJ,maxJ); fflush(stdout); rb = min(maxJ,RB[minI]); j = (rb-minJ+1) * sizeof(int); THH1 = (int *) ckalloc(j) - minJ; TDD1 = (int *) ckalloc(j) - minJ; TVV1 = (int *) ckalloc(j) - minJ; lb = max(minJ,LB[maxI]); j = (maxJ-lb+1) * sizeof(int); BRH1 = (int *) ckalloc(j) - lb; BRD1 = (int *) ckalloc(j) - lb; BRV1 = (int *) ckalloc(j) - lb; j = (maxI-minI+1) * sizeof(int); LHH1 = (int *) ckalloc(j) - minI; LDD1 = (int *) ckalloc(j) - minI; LVV1 = (int *) ckalloc(j) - minI; RRH1 = (int *) ckalloc(j) - minI; RRD1 = (int *) ckalloc(j) - minI; RRV1 = (int *) ckalloc(j) - minI; /* Initialize THH1, TDD1 and TVV1 */ if (minI == midI) { for (j=minJ; j<=min(maxJ,RB[minI]); ++j) { THH1[j] = MIHH[j]; TDD1[j] = MIDD[j]; TVV1[j] = MIVV[j]; } } else { if (LB[minI] >= midJ) { h = THH1[minJ] = LHH[minI]; d = TDD1[minJ] = LDD[minI]; v = TVV1[minJ] = LVV[minI]; } else { h = THH1[minJ] = MJHH[minI]; d = TDD1[minJ] = MJDD[minI]; v = TVV1[minJ] = MJVV[minI]; } flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); script2hei(EN,SI,he1,M1,L1); pal2 = AL2[minJ]; for (j=minJ+1; j<=min(maxJ,RB[minI]); ++j) { FORWARD_FIRST_ROW THH1[j] = h; TDD1[j] = d; TVV1[j] = v; } } /* Initialize BRH1, BRD1 and BRV1 */ if (maxI == I2) { for (j=max(minJ,LB[maxI]); j<=maxJ; ++j) { BRH1[j] = BRH[j]; BRD1[j] = BRD[j]; BRV1[j] = BRV[j]; } } else { h = BRH1[maxJ] = RRH[maxI]; d = BRD1[maxJ] = RRD[maxI]; v = BRV1[maxJ] = RRV[maxI]; locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); cal2 = AL2[maxJ]; for (j=maxJ-1; j>=max(minJ,LB[maxI]); --j) { BACKWARD_LAST_ROW BRH1[j] = h; BRD1[j] = d; BRV1[j] = v; } } /* Initialize LHH1, LDD1 and LVV1 */ if (minJ == midJ) { for (i=minI; i<=maxI; ++i) { if (LB[i] >= midJ) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; } else { LHH1[i] = MJHH[i]; LDD1[i] = MJDD[i]; LVV1[i] = MJVV[i]; } } } else { LHH1[minI] = MIHH[minJ]; LDD1[minI] = MIDD[minJ]; LVV1[minI] = MIVV[minJ]; flocate_entry(EN,LO,SI,M1,L1,minI,N1); script2col(EN,SI,cal1,M1,L1); i = minI+1; while (i<=maxI && LB[i]<=minJ) { FORWARD_minJ ++i; } while (i<=maxI) { LHH1[i] = LHH[i]; LDD1[i] = LDD[i]; LVV1[i] = LVV[i]; ++i; } } /* Initialize RRH1, RRD1 and RRV1 */ if (maxJ == J2) { for (i=maxI; i>=minI; --i) { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; } } else { RRH1[maxI] = BRH[maxJ]; RRD1[maxI] = BRD[maxJ]; RRV1[maxI] = BRV[maxJ]; locate_entry(EN,LO,SI,M1,L1,maxI,N1); script2col(EN,SI,pal1,M1,L1); i = maxI-1; while (i>=minI && RB[i]>=maxJ) { BACKWARD_maxJ --i; } while (i>=minI) { RRH1[i] = RRH[i]; RRD1[i] = RRD[i]; RRV1[i] = RRV[i]; --i; } } if (minI > midI) tf = TRUE; else tf = FALSE; if (minJ > midJ) lf = TRUE; else lf = FALSE; /* printf("Subproblem4:minI=%d,minJ=%d,maxI=%d,maxJ=%d\n",minI,minJ,maxI,maxJ); */ suboptimal(minI,minJ,maxI,maxJ,THH1,TDD1,TVV1,LHH1,LDD1,LVV1, BRH1,BRD1,BRV1,RRH1,RRD1,RRV1,lf,tf); free(THH1+minJ); free(TDD1+minJ); free(TVV1+minJ); lb = max(minJ,LB[maxI]); free(BRH1+lb); free(BRD1+lb); free(BRV1+lb); free(LHH1+minI); free(LDD1+minI); free(LVV1+minI); free(RRH1+minI); free(RRD1+minI); free(RRV1+minI); } /* free local arrays */ lb = max(J1,LB[midI]); free(MIHH+lb); free(MIDD+lb); free(MIVV+lb); free(MIRH+lb); free(MIRD+lb); free(MIRV+lb); free(MJHH+I1); free(MJDD+I1); free(MJVV+I1); free(MJRH+I1); free(MJRD+I1); free(MJRV+I1); free(LBS+I1); free(RBS+I1); lb = max(LB[I2],J1); free(BS+lb); free(TS+J1); } /* align_score - return the score of the alignment AL */ int align_score(int M, int N) { int I, J, i, j; int score; char *cal, *pal, *tal; char ci, cj, pi, ti, tj; char *e1; int *EN, *SI, *LO; char *ckalloc(); cal = ckalloc(M+1); pal = ckalloc(M+1); e1 = ckalloc(M+1); j = (M1+1)*sizeof(int); EN = (int *) ckalloc(j); SI = (int *) ckalloc(j); LO = (int *) ckalloc(j); for (I=0; I<=M; ++I) { EN[I] = 0; SI[I] = 1; LO[I] = abs(Seq_Script[1+I][0]); pal[I] = DASH; } score = 0; for (j=1; j<=N; ++j) { script2col(EN,SI,cal,M,1); script2ei(EN,SI,e1, M,1); for (I=0; I=1 && AL[j][i]==DASH) { E1[j][i] = RSTAR; --j; } } score = 0; pal = AL[0]; for (j=1; j<=N; ++j) { e1 = E1[j]; cal = AL[j]; for (I=0; Ij,np->h,np->d,np->v,np->h1,np->d1,np->v1); np = np->link; } } } /* C_Construct - construct a row */ void C_Construct(int row) { NODE *p, *q; p = F[row]; while (p && (q = p->link)) { if (q->h + q->h1 >= cut_off) p->hlink = q; p = q; } } /* Row_link - link the current row to the previous row */ void Row_link(int row) { NODE *p, *q; p = F[row]; q = F[row-1]; while (p) { while (q && q->j < p->j -1) q = q->link; if (q && q->j == p->j - 1) { if (p->d + p->d1 >= cut_off) q->dlink = p; q = q->link; } if (q && q->j == p->j) { if (p->v + p->v1 >= cut_off) q->vlink = p; } p = p->link; } } /* FtoDAG - Transform F to a DAG */ void FtoDAG(int I1, int I2) { int i; /* Construct row si */ C_Construct(I1); for (i=I1+1; i<=I2; ++i) { C_Construct(i); Row_link(i); } } /* DAG_traverse - traverse the constructed suboptimal DAG */ void DAG_traverse(int I1, int I2) { int i; NODE *np; for (i=I1; i<=I2; ++i) { printf("****** row %d ******\n",i); np = F[i]; if (!np) printf("check column i=%d\n",i); while (np) { printf("%d",np->j); if (np->hlink) printf(" hlink"); if (np->dlink) printf(" dlink"); if (np->vlink) printf(" vlink"); printf("\n"); if (!np->hlink && !np->dlink && !np->vlink) printf("(%d %d) is a dead end.\n",i,np->j); np = np->link; } } } /* DAG_pat_match - find all pattern occurrences in a DAG */ void DAG_pat_match(int I1, int I2) { int i, j, state, ti; char *ckalloc(); NODE *np, *np1; SOUT *sp; CHAIN_NODE *cp; char c; char *cal2, *e2; flocate_entry(EN,LO,SI,M1,L1,I1,N1); for (i=I1; idlink) { j = np1->j; cal2 = AL2[j]; e2 = E2[j]; if ((c = Consensus(cal1,e1,cal2,e2)) != SYMN) { /* a possible state transition */ state = np->state; while (gf[state][c] == FAIL) state = fs[state]; state = gf[state][c]; /* save the state in grid point np1 */ np1->state = state; if (s_output[state]) { /* pattern found */ sp = s_output[state]; /* printf("In position %d %d:",i+1,np1->j); */ for (sp=s_output[state]; sp; sp=sp->link) { /* add the pattern to the chain */ /* printf(" %d", sp->pat_no); */ cp = (CHAIN_NODE *) ckalloc(sizeof(CHAIN_NODE)); cp->pat_no = sp->pat_no; cp->j = np1->j - pat_len[sp->pat_no]; cp->np = np1; ti = i + 1 - pat_len[sp->pat_no]; cp->link = Chain[ti]; Chain[ti] = cp; } /* printf("\n"); */ } } } np = np->link; } } /* sort the Chain for each row except the last one */ for (i=I1; ilink) { for (cp1=cp->link; cp1; cp1=cp1->link) { if (cp1->j > cp->j) { /* swap */ j = cp->j; pat_no = cp->pat_no; np = cp->np; cp->j = cp1->j; cp->pat_no = cp1->pat_no; cp->np = cp1->np; cp1->j = j; cp1->pat_no = pat_no; cp1->np = np; } } } } /* Consensus - return the consensus of the column. return SYMN if there is no agreement */ char Consensus(char *c1, char *e1, char *c2, char *e2) { int COUNT[SIGMA+1]; /* SYMA:0, SYMC:1, SYMG:2, SYMT:3 */ int vc; /* valid character count */ int i; char c; vc = 0; for (i=0; i<=3; ++i) COUNT[i] = 0; for (i=0; i<=M1; ++i) { c = ET(c1[i],e1[i]); if (c == DASH) return SYMN; else { if ((c != LSTAR) && (c != RSTAR)) { ++vc; ++COUNT[c]; } } } for (i=0; i<=M2; ++i) { c = ET(c2[i],e2[i]); if (c == DASH) return SYMN; else { if ((c != LSTAR) && (c != RSTAR)) { ++vc; ++COUNT[c]; } } } if (vc > 1) { if (vc == 2) { for (i=0; i<=3; ++i) if (COUNT[i] >= vc) return i; } else { for (i=0; i<=3; ++i) if (COUNT[i] >= vc-1) return i; } } return SYMN; } /* back_chain - a backward traverse of the DAG to chain up the patterns */ void back_chain(int I1, int I2) { int i, j, I, J; NODE *np, *np1; CHAIN_NODE *cp; int hs, ds, vs, t, hbs, dbs, vbs, c; NODE *hnext, *dnext, *vnext; char *pal2, *cal2, *e2, *ve2; char ci, cj, pi, pj, ei, ej; int hc, dc, vc; /* backward computation to compute the best pattern chain score */ /* Tie breaking rule: if more than one paths result the maximum pattern score, we take the one with the highest alignment score */ /* Initialize the last row */ for (np=LastF[I2]; np; np=np->blink) { np->hs = 0; np->ds = 0; np->vs = 0; np->hnext = LastF[I2]; np->dnext = LastF[I2]; np->vnext = LastF[I2]; np->hbs = np->h1; np->dbs = np->d1; np->vbs = np->v1; } /* backward computation */ locate_entry(EN,LO,SI,M1,L1,I2,N1); script2col(EN,SI,pal1,M1,L1); for (i=I2-1; i>=I1; --i) { tal1 = cal1; cal1 = pal1; pal1 = tal1; script2ei(EN,SI,e1,M1,L1); backward_script(EN,LO,SI,M1,L1); script2col(EN,SI,pal1,M1,L1); script2hei(EN,SI,he1,M1,L1); cp = Chain[i]; for (np=LastF[i]; np; np=np->blink) { j = np->j; if (np->hlink || np->dlink) { cal2 = AL2[j+1]; e2 = E2[j+1]; } pal2 = AL2[j]; ve2 = VE2[j]; hbs = dbs = vbs = MININT; hs = ds = vs = 0; /* Compute X->h */ if (np1 = np->hlink) { c = 0; for (I=0; I<=M2; ++I) { cj = ET(cal2[I],e2[I]); for (J=0; J<=M1; ++J) c += PD(StoA[L1+J],StoA[L2+I],he1[J],cj); } hc = dc = vc = c; for (I=0; I<=M1; ++I) { pi = pal1[I]; ei = GV(he1[I]); for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hc -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ei][cj]; dc -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ei][cj]; vc -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ei][cj]; } } if (np->h + np1->h1 + hc >= cut_off) { /* edge from h node to h node */ if ((np1->hs > hs) || ((np1->hs == hs) && (np1->hbs + hc >= hbs))) { hs = np1->hs; hnext = np1; hbs = np1->hbs + hc; } } if (np->d + np1->h1 + dc >= cut_off) { /* edge from d node to h node */ if ((np1->hs > ds) || ((np1->hs == ds) && (np1->hbs + dc >= dbs))) { ds = np1->hs; dnext = np1; dbs = np1->hbs + dc; } } if (np->v + np1->h1 + vc >= cut_off) { /* edge from v node to h node */ if ((np1->hs > vs) || ((np1->hs == vs) && (np1->hbs + vc >= vbs))) { vs = np1->hs; vnext = np1; vbs = np1->hbs + vc; } } } /* Compute X->v */ if (np1 = np->vlink) { c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) c += PD(StoA[L1+I],StoA[L2+J],ci,ve2[J]); } hc = dc = vc = c; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ej = GV(ve2[J]); pj = pal2[J]; hc -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][ej]; dc -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][ej]; vc -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][ej]; } } if (np->h + np1->v1 + hc >= cut_off) { /* edge from h node to v node */ if ((np1->vs > hs) || ((np1->vs == hs) && (np1->vbs + hc >= hbs))) { hs = np1->vs; hnext = np1; hbs = np1->vbs + hc; } } if (np->d + np1->v1 + dc >= cut_off) { /* edge from d node to v node */ if ((np1->vs > ds) || ((np1->vs == ds) && (np1->vbs + dc >= dbs))) { ds = np1->vs; dnext = np1; dbs = np1->vbs + dc; } } if (np->v + np1->v1 + vc >= cut_off) { /* edge from v node to v node */ if ((np1->vs > vs) || ((np1->vs == vs) && (np1->vbs + vc >= vbs))) { vs = np1->vs; vnext = np1; vbs = np1->vbs + vc; } } } /* Compute X->d */ np->pat_no = -1; if (np1 = np->dlink) { /* substitution edge is in some suboptimal path */ c = 0; for (I=0; I<=M1; ++I) { ci = ET(cal1[I],e1[I]); for (J=0; J<=M2; ++J) { cj = ET(cal2[J],e2[J]); c += PD(StoA[L1+I],StoA[L2+J],ci,cj); } } hc = dc = vc = c; for (I=0; I<=M1; ++I) { ETGV(ci,cal1[I],e1[I]) pi = pal1[I]; for (J=0; J<=M2; ++J) { ETGV(cj,cal2[J],e2[J]) pj = pal2[J]; hc -= PT[StoA[L1+I]][StoA[L2+J]][0][pj!=DASH][ci][cj]; vc -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][0][ci][cj]; dc -= PT[StoA[L1+I]][StoA[L2+J]][pi!=DASH][pj!=DASH][ci][cj]; } } if (np->h + np1->d1 + hc >= cut_off) { /* edge from h node to d node */ if ((np1->ds > hs) || ((np1->ds == hs) && (np1->dbs + hc >= hbs))) { hs = np1->ds; hnext = np1; hbs = np1->dbs + hc; } } if (np->d + np1->d1 + dc >= cut_off) { /* edge from d node to d node */ if ((np1->ds > ds) || ((np1->ds == ds) && (np1->dbs + dc >= dbs))) { ds = np1->ds; dnext = np1; dbs = np1->dbs + dc; } } if (np->v + np1->d1 + vc >= cut_off) { /* edge from v node to d node */ if ((np1->ds > vs) || ((np1->ds == vs) && (np1->dbs + vc >= vbs))) { vs = np1->ds; vnext = np1; vbs = np1->dbs + vc; } } while (cp && cp->j == np->j) { /* possible jumps with patterns */ /* printf("i=%d,j=%d\n",i,cp->j); printf("hs=%d, ds=%d, vs=%d\n",hs,ds,vs); */ t = pat_score[cp->pat_no] + cp->np->ds; if (np->h + np1->d1 + hc >= cut_off) { if ((t > hs) || ((t == hs) && (np1->dbs + hc > hbs))) { hs = t; hnext = cp->np; np->pat_no = cp->pat_no; hbs = np1->dbs + hc; } } if (np->d + np1->d1 + dc >= cut_off) { if ((t > ds) || ((t == ds) && (np1->dbs + dc > dbs))) { ds = t; dnext = cp->np; np->pat_no = cp->pat_no; dbs = np1->dbs + dc; } } if (np->v + np1->d1 + vc >= cut_off) { if ((t > vs) || ((t == vs) && (np1->dbs + vc > vbs))) { vs = t; vnext = cp->np; np->pat_no = cp->pat_no; vbs = np1->dbs + vc; } } /* printf("t=%d, hs=%d, ds=%d, vs=%d\n",t,hs,ds,vs); */ cp = cp->link; } } np->hbs = hbs; np->dbs = dbs; np->vbs = vbs; np->hs = hs; np->ds = ds; np->vs = vs; np->hnext = hnext; np->dnext = dnext; np->vnext = vnext; } } } /* output_path - output a path with the maximum pattern score */ void output_path(int I1, int J1, char node1, int I2, int J2, char node2) { NODE *np, *np1; char type; int t; /* start from node1 at (I1,I2) */ np = F[I1]; type = node1; if (type == XNODE) { t = max3(np->hs, np->ds, np->vs); if (t == 0) { t = max3(np->h1,np->d1,np->v1); if (np->h1 == t) type = HNODE; else if (np->d1 == t) type = DNODE; else type = VNODE; } else { if (np->hs == t) type = HNODE; else if (np->ds == t) type = DNODE; else type = VNODE; } } if (type == HNODE) printf("best pattern score (HNODE) = %d\n",np->hs); else if (type == DNODE) printf("best pattern score (DNODE) = %d\n",np->ds); else printf("best pattern score (VNODE) = %d\n",np->vs); /* printf("h=%d, d=%d, v=%d, h1=%d, d1=%d, v1=%d\n",np->h,np->d,np->v,np->h1,np->d1,np->v1); printf("h+h1=%d, d+d1=%d, v+v1=%d\n",np->h+np->h1,np->d+np->d1,np->v+np->v1); fflush(stdout); */ while ((np->i < I2) || (np->j < J2)) { if (type == HNODE) np1 = np->hnext; else if (type == DNODE) np1 = np->dnext; else np1 = np->vnext; if (np->i == np1->i) { /* -> h node */ t = np1->j - np->j; /* printf("insert %d\n",t); */ INS(t) type = HNODE; } else if (np->j == np1->j) { /* -> v node */ t = np1->i - np->i; /* printf("delete %d\n",t); */ DEL(t) type = VNODE; } else { /* -> d node */ /* printf("replace %d\n",np1->i-np->i); */ for (t=np->i; ti; ++t) REP type = DNODE; /* if (np->pat_no >= 0) for (t=np->i+1; t<=np1->i; ++t) In_pat[t] = TRUE; */ } np = np1; } } /* max3 : three argument maximum */ int max3 (int x, int y, int z) { if (x>y) if (x>z) return x; else return z; else if (y>z) return y; else return z; }