/* button.c Compile with: gcc -g button.c -lm -o N.data/button gcc -g button.c -DANCHOR -lm -o A.data/button Example run: button beta_reference S=6 Initiates phylogen test runs for a fixed specified min_length (S) and values of fixed anchor between 0 and 100 (no flexible anchor case, N) and -100 and 100 (flexible anchor=#active sequences case, A) and for each run computes the # of false negatives/positives with respect to a reference set of landmarks. The intervals were chosen to cover the effective range [0,d-1] (case N) and [-d+1,-1] (case A), where d=#sequences in the alignment and d-1 = maximum phylogenetic distance for an alignment of d sequences. These are the ranges in which changes occur. Tests were performed for the gap-free (strict) version only. NOTE: Names of alignment files/paths may need to be changed. Look for LLL to locate the references. */ #include #include #include #include #include #include #define Usage "%s [S=] [L=] [U=]\n" void fatal(char *message); void fatalf(char *msg, char *val); FILE *ckopen(char *name, char *mode); void *ckalloc(size_t amount); typedef struct data { char filename[50]; int nfp; int nfn; } data_t; void run_phylogen(double low, double up, double epsilon, int flag, data_t *low_data, data_t *up_data); void run_test(double anchor, data_t *data); void compare(char *filename, data_t *data); #ifndef ANCHOR #define DEFAULT_LOWER_BOUND 0.00 #define DEFAULT_UPPER_BOUND 100.00 #else #define DEFAULT_LOWER_BOUND -100.00 #define DEFAULT_UPPER_BOUND 100.00 #endif #define DEFAULT_REFINE .001 #define DEFAULT_LENGTH 6 #define LOWER 0x1 #define UPPER 0x2 int *Fct, *Regs, To, From, Length, len; int main(int argc, char *argv[]) { int to, from, i; double low, up, eps; data_t low_data, up_data; char buffer[200]; FILE *fp; if ((argc<3) || (argc>5)) fatalf(Usage,argv[0]); Length = DEFAULT_LENGTH; low = DEFAULT_LOWER_BOUND; up = DEFAULT_UPPER_BOUND; eps = DEFAULT_REFINE; while (argc>2) { if (!strncmp(argv[argc-1],"S=",2)) Length = atoi(argv[argc-1]+2); else if (!strncmp(argv[argc-1],"L=",2)) low = atof(argv[argc-1]+2); else if (!strncmp(argv[argc-1],"U=",2)) up = atof(argv[argc-1]+2); else fatalf(Usage,argv[0]); argc--; } /* read reference file */ fp = ckopen(argv[1],"r"); len = 0; fgets(buffer,200,fp); if (buffer[0]!='>') fatal("Wrong file configuration."); sscanf(buffer+1, "%d %d", &From, &To); Fct = (int *)ckalloc((To-From+1)*sizeof(int))-From; Regs = (int *)ckalloc((To-From+1)*sizeof(int))-From; for (i=From; i<=To; Fct[i]=Regs[i]=0, i++); while (fgets(buffer,200,fp)!=NULL) { if (!isdigit(buffer[0])) continue; sscanf(buffer, "%d %d", &from, &to); for (i=from; i<=to; i++) if ((i>=From) && (i<=To)) Fct[i] = 1; len += to-from+1; } fclose(fp); run_phylogen(low, up, eps, LOWER | UPPER, &low_data, &up_data); /* system("sort -nr +7 DEVIATION > araCe_DEVIATION.total"); */ return 0; } void run_phylogen(double low, double up, double epsilon, int flag, data_t *low_data, data_t *up_data) { double mid; data_t mid_data; FILE *fp; if (up-low < epsilon) return; if (flag & LOWER) run_test(low,low_data); if (flag & UPPER) run_test(up, up_data); if ((low_data->nfp!=up_data->nfp) || (low_data->nfn!=up_data->nfn)) { fp = ckopen("DEVIATION","a"); if (flag & LOWER) fprintf(fp, "%-15s \tfp: %d \tfn: %d \ttotal: %d (%d)\n", low_data->filename, low_data->nfp, low_data->nfn, low_data->nfp+low_data->nfn, len); fclose(fp); mid = (up+low)/2.0; run_phylogen(low,mid,epsilon,UPPER,low_data,&mid_data); run_phylogen(mid,up,epsilon,LOWER,&mid_data,up_data); if ((flag & UPPER) && ((up_data->nfp!=mid_data.nfp) || (up_data->nfn!=mid_data.nfn))) fprintf(fp, "%-15s \tfp: %d \tfn: %d \ttotal: %d (%d)\n", up_data->filename, up_data->nfp, up_data->nfn, up_data->nfp+up_data->nfn, len); } else if (flag & LOWER) { fp = ckopen("DEVIATION","a"); fprintf(fp, "%-15s \tfp: %d \tfn: %d \ttotal: %d (%d)\n", low_data->filename, low_data->nfp, low_data->nfn, low_data->nfp+low_data->nfn, len); fclose(fp); } } void run_test(double anchor, data_t *data) { char command[200]; char filename[50]; /* LLL change name of region here */ #ifndef ANCHOR sprintf(filename, "phylogen.araCe.l%d.N.a%1.3f.list", Length, anchor); #else sprintf(filename, "phylogen.araCe.l%d.A.a%1.3f.list", Length, anchor); #endif /* LLL -- example of command line for tests on the beta-globin alignment: sprintf(command,"phylogen alignment -I /usr/local/globin/data/align.beta -I /depot/globin/data -f tree.new -r %d %d -l %d -a 5 -o -v %f > %s", From, To, Length, anchor, filename); */ /* LLL change name of alignment file + tree + paths here */ #ifndef ANCHOR sprintf(command,"/net/finch/usr/local/globin/bin/phylogen araCe.a -I /home/galapagos/florea/test.d -f araCe_tree.def -r %d %d -l %d -a 5 -o -v %f > %s", From, To, Length, anchor, filename); #else sprintf(command,"/net/finch/usr/local/globin/bin/phylogen araCe.a -I /home/galapagos/florea/test.d -f araCe_tree.def -r %d %d -l %d -a 5 -c -v %f > %s", From, To, Length, anchor, filename); #endif system(command); compare(filename, data); return; } void compare(char *filename, data_t *data) { FILE *tfp; int from, to, i; char buffer[200]; tfp = ckopen(filename,"r"); while (fgets(buffer,200,tfp)!=NULL) { if (!isdigit(buffer[0])) continue; sscanf(buffer, "%d %d", &from, &to); for (i=from; i<=to; i++) if ((i>=From) && (i<=To)) Regs[i] = 1; } fclose(tfp); strcpy(data->filename, filename); data->nfn = data->nfp = 0; for (i=From; i<=To; i++) if (!Fct[i] && Regs[i]) data->nfp++; else if (Fct[i] && !Regs[i]) data->nfn++; for (i=From; i<=To; Regs[i++]=0); return; } void fatal(char *msg) { (void)fprintf(stderr, "%s\n", msg); exit(1); } void fatalf(char *msg, char *val) { (void)fprintf(stderr, msg, val); (void)putc('\n', stderr); exit(1); } FILE *ckopen(char *name, char *mode) { FILE *fp; if ((fp = fopen(name, mode)) == NULL) fatalf("Cannot open %s.", name); return fp; } void *ckalloc(size_t amount) { void *p; if ((p = malloc(amount)) == NULL) fatal("Ran out of memory."); return p; }