/* A PACKAGE FOR CONSTRAINED SEQUENCE COMPARISON WITH AFFINE WEIGHTS (SAVE COST_ONLY VECTORS FOR EACH PARTITION ROW) */ /* Globally passed params and macros */ #include #define DIGIT 10.0 #define MININT -999999 #define max(x,y) ((x) >= (y) ? (x) : (y)) #define min(x,y) ((x) <= (y) ? (x) : (y)) #define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */ #define FORWARD \ { if ((c = c - m) > (e = e - h)) e = c; \ if ((c = CC[j] - m) > (d = DD[j] - h)) d = c; \ c = s + wa[B[j]]; \ if (e > c) c = e; \ if (d > c) c = d; \ s = CC[j]; \ CC[j] = c; \ DD[j] = d; \ } #define BACKWARD \ { c = s + wa[B[j+1]]; \ d = SS[j]-h; \ if (e-m > c) c = e-m; \ if (d-g > c) c = d-g; \ if (c > d) d = c; \ if (c > (e = e-h)) e = c; \ s = RR[j]; \ RR[j] = c; \ SS[j] = d; \ } #define F_ONEROW(i,k) \ { wa = w[A[i]]; \ l = max(J1,L[i]); \ if (l > max(J1,L[i-1])) { \ if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; \ c = CC[l-1]+wa[B[l]]; \ } else { \ if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; \ } \ if (d > c) c = d; \ c = max(c,VC[i]); \ s = CC[l]; \ CC[l] = c; \ DD[l] = d; \ e = max(VE[i],c-g); \ for (j = l+1; j <= k; j++) FORWARD; \ } #define LF_ONEROW(i,k) \ { wa = w[A[i]]; \ l = max(J1,L[i]); \ if (l > max(J1,L[i-1])) { \ if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; \ c = CC[l-1]+wa[B[l]]; \ } else { \ if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; \ } \ if (d > c) c = d; \ s = CC[l]; \ CC[l] = c; \ DD[l] = d; \ e = c-g; \ for (j = l+1; j <= k; j++) FORWARD; \ } #define DF_ONEROW(i,k) \ { wa = w[A[i]]; \ l = L[i]; \ if (l > L[i-1]) { \ if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; \ c = CC[l-1]+wa[B[l]]; \ } else { \ if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; \ } \ if (d > c) c = d; \ s = CC[l]; \ CC[l] = c; \ DD[l] = d; \ e = c-g; \ for (j = l+1; j <= k; j++) FORWARD; \ } #define B_ONEROW(i,k) \ { wa = w[A[i+1]]; \ r = min(J2,R[i]); \ if (r < min(J2,R[i+1])) { \ if ((c = RR[r+1] + wa[B[r+1]]) < SS[r] - m) c = SS[r] - m; \ if (c > (d = SS[r] - h)) d = c; \ } else { \ d = SS[r] - h; \ c = d-g; \ } \ s = RR[r]; \ RR[r] = c; \ SS[r] = d; \ e = c; \ for (j = r-1; j >= k; --j) BACKWARD \ } #define LB_ONEROW(i,k) \ { wa = w[A[i+1]]; \ r = min(J2,R[i]); \ if (r < min(J2,R[i+1])) { \ if ((c = RR[r+1] + wa[B[r+1]]) < SS[r] - m) c = SS[r] - m; \ if (c > (d = SS[r] - h)) d = c; \ } else { \ d = SS[r] - h; \ c = d-g; \ } \ c = max(VR[i],c); \ s = RR[r]; \ RR[r] = c; \ SS[r] = d; \ e = max(VT[i],c); \ for (j = r-1; j >= k; --j) BACKWARD \ } #define DB_ONEROW(i,k) \ { wa = w[A[i+1]]; \ r = R[i]; \ if (r < R[i+1]) { \ if ((c = RR[r+1] + wa[B[r+1]]) < SS[r] - m) c = SS[r] - m; \ if (c > (d = SS[r] - h)) d = c; \ } else { \ d = SS[r] - h; \ c = d-g; \ } \ s = RR[r]; \ RR[r] = c; \ SS[r] = d; \ e = c; \ for (j = r-1; j >= k; --j) BACKWARD \ } static long (*w)[128]; /* w = W */ static long g, h, m; /* g = G, h = H, m = g+h */ static long *CC, *DD; /* Forward cost-only vectors */ static long *RR, *SS; /* Reverse cost-only vectors */ static long *ICC, *IDD; /* Forward cost-only initial vectors */ static long *IRR, *ISS; /* Reverse cost-only initial vectors */ static long *VC, *VE; /* Rightward cost-only vectors */ static long *VR, *VT; /* Leftward cost-only vectors */ static long *IVC, *IVE; /* Rightward cost-only initial vectors */ static long *IVR, *IVT; /* Leftward cost-only initial vectors */ static long cutoff; static char *A, *B; static long **PCC, **PDD, **PRR, **PSS; /* Partition cost_only vectors */ static char *cut; static long opt; static long right_locate(I1,J1,I2,J2,d1,L,R) int I1,J1,I2,J2; long d1; int L[], R[]; { register int i, j; int k; register long c, e, d, s; long t, *wa; int l, r; int midi, midi1, midj, midj1; long d2, tr, ts; char type; /* Boundary cases */ /* printf("I1=%d, J1=%d, I2=%d, J2=%d\n",I1,J1,I2,J2); */ if (I2 <= I1) { R[I1] = J2; return 0; } if (J2 <= J1) { for (i = I1; i <= I2; ++i) R[i] = J2; return 0; } for (j = J1; j <= J2; ++j) { RR[j] = IRR[j]; SS[j] = ISS[j]; } for (i = I1; i <= I2; ++i) { VC[i] = IVC[i]; VE[i] = IVE[i]; } midi = (I1+I2)/2; midi1 = midi+1; DD[J1] = d1; CC[J1] = t = VC[I1]; t = max(VE[I1],t-g); r = min(J2,R[I1]); for (j = J1+1; j <= r; ++j) { CC[j] = t = t-h; DD[j] = t-g; } for (j = r+1; j <= J2; ++j) { CC[j] = DD[j] = MININT; } for (i = I1+1; i <= midi; i++) { k = min(J2,R[i]); F_ONEROW(i,k) } for (i = I2-1; i >= midi1; --i) { k = max(J1,L[i]); B_ONEROW(i,k) } l = max(J1,L[midi1]); r = min(J2,R[midi]); wa = w[A[midi1]]; midj = MININT; if ((l > max(J1,L[midi])) && (CC[l-1] + RR[l] + wa[B[l]] >= cutoff)) { midj = l; type = 1; } if ((CC[l] + SS[l] - m >= cutoff) || (DD[l] + SS[l] - h >= cutoff)) { midj = l; type = 2; } for (j = l+1; j <= r; ++j) { if (CC[j-1] + RR[j] + wa[B[j]] >= cutoff) { midj = j; type = 1; } if ((CC[j] + SS[j] - m >= cutoff) || (DD[j] + SS[j] - h >= cutoff)) { midj = j; type = 2; } } if ((r < min(J2,R[midi1])) && (CC[r] + RR[r+1] + wa[B[r+1]] >= cutoff)) { midj = r+1; type = 1; } if (midj == MININT) return -1; /* printf("i= %d, midj = %d\n",i,midj); */ if (type == 1) midj1 = midj - 1; else midj1 = midj; d2 = max(CC[midj]-m,DD[midj]-h); k = max(J1,L[midi]); B_ONEROW(midi,k) tr = IRR[midj]; ts = ISS[midj]; for (j = max(J1,L[midi]); j <= midj1; ++j) { IRR[j] = RR[j]; ISS[j] = SS[j]; } for (i = midi1; (i <= I2) && (L[i] <= midj); ++i) { F_ONEROW(i,midj) IVC[i] = c; IVE[i] = e; } right_locate(I1,J1,midi,midj1,d1,L,R); IRR[midj] = tr; ISS[midj] = ts; right_locate(midi1,midj,I2,J2,d2,L,R); return 0; } static long left_locate(I1,J1,I2,J2,s2,L,R) int I1,J1,I2,J2; long s2; int L[], R[]; { register int i, j; int k; register long c, e, d, s; long t, *wa; int l, r; int midi, midi1, midj, midj1; long s1, tc, td; char type; /* Boundary cases */ /* printf("I1=%d, J1=%d, I2=%d, J2=%d\n",I1,J1,I2,J2); */ if (I2 <= I1) { L[I1] = J1; return 0; } if (J2 <= J1) { for (i = I1; i <= I2; ++i) { /* printf("i = %d, L = %d, J1= %d\n",i,L[i],J1); */ L[i] = J1; } return 0; } for (j = J1; j <= J2; ++j) { CC[j] = ICC[j]; DD[j] = IDD[j]; } for (i = I1; i <= I2; ++i) { VR[i] = IVR[i]; VT[i] = IVT[i]; } midi = (I1+I2)/2; midi1 = midi+1; for (i = I1+1; i <= midi; i++) { k = min(J2,R[i]); LF_ONEROW(i,k) } l = max(J1,L[I2]); SS[J2] = s2; RR[J2] = VR[I2]; t = max(VT[I2],VR[I2]) - g; for (j = J2-1; j >= l; --j) RR[j] = SS[j] = t = t-h; for (j = l-1; j >= J1; --j) RR[j] = SS[j] = MININT; for (i = I2-1; i >= midi1; --i) { k = max(J1,L[i]); LB_ONEROW(i,k) } l = max(J1,L[midi1]); r = min(J2,R[midi]); wa = w[A[midi1]]; midj = MININT; if ((r < min(J2,R[midi1])) && (CC[r] + RR[r+1] + wa[B[r+1]] >= cutoff)) { midj = r+1; type = 1; } for (j = r; j >= l+1; --j) { if ((CC[j] + SS[j] - m >= cutoff) || (DD[j] + SS[j] - h >= cutoff)) { midj = j; type = 2; } if (CC[j-1] + RR[j] + wa[B[j]] >= cutoff) { midj = j; type = 1; } } if ((CC[l] + SS[l] - m >= cutoff) || (DD[l] + SS[l] - h >= cutoff)) { midj = l; type = 2; } if ((l > max(J1,L[midi])) && (CC[l-1] + RR[l] + wa[B[l]] >= cutoff)) { midj = l; type = 1; } if (midj == MININT) { printf("wrong!!\n"); printf("I1=%d, J1=%d, I2=%d, J2=%d\n",I1,J1,I2,J2); printf("%d, %d, %d, %d\n",L[midi],R[midi],L[midi1],R[midi1]); printf("CC=%d, RR=%d VR=%d\n",CC[r],RR[r+1],VR[I2]); return -1; } /* printf("i= %d, midj = %d\n",i,midj); */ if (type == 1) midj1 = midj - 1; else midj1 = midj; s1 = SS[midj1]-h; k = min(J2,R[midi1]); LF_ONEROW(midi1,k); tc = CC[midj]; td = DD[midj]; for (j = midj+1; j <= min(J2,R[midi1]); ++j) { ICC[j] = CC[j]; IDD[j] = DD[j]; } for (i = midi; (i >= I1) && (R[i] >= midj1); --i) { LB_ONEROW(i,midj1) IVR[i] = c; IVT[i] = e; } s1 = max(s1,IVR[midi]); left_locate(I1,J1,midi,midj1,s1,L,R); ICC[midj] = tc; IDD[midj] = td; left_locate(midi1,midj,I2,J2,s2,L,R); return 0; } /* right_init - initializations for right_locate() */ right_init(M,N,L) int M, N; int L[]; { register int i, j; long t; IVC[0] = 0; IVE[0] = -g; for (i = 1; i <= M; ++i) { IVC[i] = IVE[i] = MININT; } IRR[N] = ISS[N] = 0; t = -g; for (j = N-1; j >= L[M]; --j) { ISS[j] = IRR[j] = t = t-h; } for (j = L[M]-1; j >= 0; --j) ISS[j] = IRR[j] = MININT; } /* left_init - initializations for left_locate() */ left_init(M,N,R) int M, N; int R[]; { register int i, j; long t; IVR[M] = 0; IVT[M] = 0; for (i = M-1; i >= 0; --i) { IVR[i] = IVT[i] = MININT; } ICC[0] = 0; IDD[0] = -g; t = -g; for (j = 1; j <= R[0]; ++j) { ICC[j] = t = t-h; IDD[j] = t - g; } for (j = R[0]+1; j <= N; ++j) ICC[j] = IDD[j] = MININT; } static long forward_score(A,B,M,N,L,R) char *A, *B; int M, N; int L[], R[]; { register int i, j; register long c, e, d, s; long t, *wa; long *pc, *pd; int l; /* Boundary cases: M <= 0 or N == 0 */ if (N <= 0) { return -gap(M); } if (M <= 0) { return -gap(N); } CC[0] = 0; DD[0] = -g; t = -g; for (j = 1; j <= R[0]; j++) { CC[j] = t = t-h; DD[j] = t-g; } for (j = R[0]+1; j <= N; ++j) { CC[j] = DD[j] = MININT; } pc = PCC[0]; pd = PDD[0]; for (j = 0; j <= R[0]; ++j) { pc[j] = CC[j]; pd[j] = DD[j]; } for (i = 1; i <= M; i++) { wa = w[A[i]]; l = L[i]; if (l > L[i-1]) { if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; c = CC[l-1]+wa[B[l]]; } else { if ((c = CC[l]-m) > (d = DD[l]-h)) d = c; } if (d > c) c = d; s = CC[l]; CC[l] = c; DD[l] = d; e = c-g; for (j = l+1; j <= R[i]; j++) FORWARD; if (cut[i] == 1 || cut[i+1] == 1) { pc = PCC[i]; pd = PDD[i]; for (j = l; j <= R[i]; ++j) { pc[j] = CC[j]; pd[j] = DD[j]; } } } return CC[N]; } static long backward_score(A,B,M,N,L,R) char *A, *B; int M, N; int L[], R[]; { register int i, j; register long c, e, d, s; long t, *wa; long *pr, *ps; int r; /* Boundary cases: M <= 0 or N == 0 */ if (N <= 0) { return -gap(M); } if (M <= 0) { return -gap(N); } RR[N] = SS[N] = 0; t = -g; for (j = N-1; j >= L[M]; --j) { SS[j] = RR[j] = t = t-h; } for (j = L[M]-1; j >= 0; --j) SS[j] = RR[j] = MININT; pr = PRR[M]; ps = PSS[M]; for (j = L[M]; j <= N; ++j) { pr[j] = RR[j]; ps[j] = SS[j]; } for (i = M-1; i >= 0; --i) { wa = w[A[i+1]]; r = R[i]; if (r < R[i+1]) { if ((c = RR[r+1] + wa[B[r+1]]) < SS[r] - m) c = SS[r] - m; if (c > (d = SS[r] - h)) d = c; } else { d = SS[r] - h; c = d-g; } s = RR[r]; RR[r] = c; SS[r] = d; e = c; for (j = r-1; j >= L[i]; --j) BACKWARD if (cut[i] == 1 || cut[i+1] == 1) { pr = PRR[i]; ps = PSS[i]; for (j = L[i]; j <= R[i]; ++j) { pr[j] = RR[j]; ps[j] = SS[j]; } } } return RR[0]; } /* Interface and top level of comparator */ long REGION_BOUNDARY(AA,BB,M,N,W,G,H,L,R,CUT_OFF) char AA[],BB[]; int M,N; long W[][128],G,H; int L[], R[]; long CUT_OFF; { int i, j, i0, k; long c, c1, c2, t, d, e, s; char *ckalloc(); int l, r, ll, rr, midj, midj1, l0; long *pc, *pd, *pr, *ps; long *wa; char flag; w = W; g = G; h = H; m = g+h; A = AA; B = BB; cutoff = CUT_OFF; cut = (char *) ckalloc(M+2); j = (N+1) * sizeof(long); CC = (long *) ckalloc(j); DD = (long *) ckalloc(j); RR = (long *) ckalloc(j); SS = (long *) ckalloc(j); ICC = IRR = (long *) ckalloc(j); IDD = ISS = (long *) ckalloc(j); j = (M+1) * sizeof(long); VC = VR = (long *) ckalloc(j); VE = VT = (long *) ckalloc(j); IVC = IVR = (long *) ckalloc(j); IVE = IVT = (long *) ckalloc(j); j = (M+1) * sizeof(long *); PCC = (long **) ckalloc(j); PDD = (long **) ckalloc(j); PRR = (long **) ckalloc(j); PSS = (long **) ckalloc(j); j = (R[0]+1)*sizeof(long); PCC[0] = (long *) ckalloc(j); PDD[0] = (long *) ckalloc(j); PRR[0] = (long *) ckalloc(j); PSS[0] = (long *) ckalloc(j); divide_region(M,L,R); for (i = 1; i <= M; ++i) { if (cut[i] == 1 || cut [i+1] == 1) { l = L[i]; r = R[i]; j = (r-l+1)*sizeof(long); PCC[i] = (long *)ckalloc(j) - l; PDD[i] = (long *)ckalloc(j) - l; PRR[i] = (long *)ckalloc(j) - l; PSS[i] = (long *)ckalloc(j) - l; } } c1 = forward_score(A,B,M,N,L,R); /* OK, do it */ c2 = backward_score(A,B,M,N,L,R); /* printf("c1 = %d\nc2 = %d\n",c1,c2); */ opt = c1; IVC[0] = 0; IVE[0] = -g; for (i = 1; i <= M; ++i) IVC[i] = IVE[i] = MININT; i0 = 0; for (i = 0; i <= M; ++i) { if (cut[i+1] == 1) { /* RIGHT_LOCATE */ ll = L[i]; rr = R[i]; pc = PCC[i]; pd = PDD[i]; pr = PRR[i]; ps = PSS[i]; midj = MININT; flag = 0; for (j = rr; j >= ll && flag == 0; --j) { if ((pc[j] + pr[j] >= cutoff) || (pd[j] + ps[j] >= cutoff)) { midj = j; flag = 1; } } /* printf("i0= %d, i= %d, midj = %d\n",i0,i,midj); */ l0 = L[i0]; for (j = midj; j >= ll; --j) { IRR[j] = pr[j]; ISS[j] = ps[j]; } for (j = ll-1; j >= l0; --j) IRR[j] = ISS[j] = MININT; pc = PCC[i0]; pd = PDD[i0]; pr = PRR[i0]; ps = PSS[i0]; midj1 = MININT; flag = 0; for (j = R[i0]; j >= l0 && flag == 0; --j) { if ((pc[j] + pr[j] >= cutoff) || (pd[j] + ps[j] >= cutoff)) { midj1 = j; flag = 1; } } if (midj1 == MININT) return -1; IVC[i0] = t = pc[midj1]; IVE[i0] = t-g; if (midj1 > l0) { for (j = l0; j <= midj1; ++j) { CC[j] = pc[j]; DD[j] = pd[j]; } for (k = i0+1; (k <= i) && (L[k] <= midj1); ++k) { DF_ONEROW(k,midj1) IVC[k] = c; IVE[k] = e; } } /* printf("i0=%d, i=%d, midj1=%d, midj=%d\n",i0,i,midj1,midj); */ right_locate(i0,midj1,i,midj,pd[midj1],L,R); /* LEFT_LOCATE */ ll = L[i]; rr = R[i]; l0 = L[i0]; pc = PCC[i]; pd = PDD[i]; pr = PRR[i]; ps = PSS[i]; midj = MININT; flag = 0; fflush(stdout); for (j = ll; j <= rr && flag == 0; ++j) { if ((pc[j] + pr[j] >= cutoff) || (pd[j] + ps[j] >= cutoff)) { midj = j; flag = 1; } } if (midj == MININT) return -1; IVT[i] = IVR[i] = pr[midj]; for (k = i-1; k >= i0; --k) IVR[k] = IVT[k] = MININT; if (midj < rr) { for (j = midj; j <= rr; ++j) { RR[j] = pr[j]; SS[j] = ps[j]; } for (k = i-1; (k >= i0) && (R[k] >= midj); --k) { DB_ONEROW(k,midj) IVR[k] = c; IVT[k] = e; } } pc = PCC[i0]; pd = PDD[i0]; pr = PRR[i0]; ps = PSS[i0]; midj1 = MININT; flag = 0; for (j = l0; j <= R[i0] && flag == 0; ++j) { if ((pc[j] + pr[j] >= cutoff) || (pd[j] + ps[j] >= cutoff)) { midj1 = j; flag = 1; } } for (j = midj1; j <= R[i0]; ++j) { ICC[j] = pc[j]; IDD[j] = pd[j]; } for (j = R[i0]+1; j <= midj; ++j) ICC[j] = IDD[j] = MININT; /* printf("i=%d, midj=%d, midj1=%d, s=%d\n",i,midj1,midj,PSS[i][midj]); printf("i0=%d, i=%d, midj1=%d, midj=%d\n",i0,i,midj1,midj); */ c = left_locate(i0,midj1,i,midj,PSS[i][midj],L,R); i0 = i+1; } } for (i = 0; i <= M; ++i) printf("%d %d\n",L[i],R[i]); return c1; } /* divide_region - divide the region into several subregions which can be covered by some rectangles whose total area is no more than 2 * region area, and total perimeter is linear to sum of two sequences's length */ divide_region(M, LB, RB) int M; int LB[], RB[]; { int i, i0, sw, w; cut[0] = 1; for (i = 1; i <= M; ++i) cut[i] = 0; i0 = 0; sw = RB[0] - LB[0] + 1; for (i = 1; i <= M; ++i) { w = RB[i] - LB[i] + 1; sw += w; if ((RB[i] - LB[i0] + 1) * (i - i0 + 1) > 2 * sw) { cut[i] = 1; i0 = i; sw = w; /* printf("%d\n",i); */ } } cut[M+1] = 1; }