/* lib.c - library of C procedures. */ #include #include #include /* extern int fprintf(FILE *, char *, ...); extern int printf(char *, ...); extern int sscanf(char *, char *, ...); extern int rewind(FILE *); */ extern int fclose(FILE *); extern int ungetc(int, FILE *); static char *score_file; /* used for score file */ #define MAX_PARAMS 100 static int param_sets = 0; static int eparam_sets = 0; static char *nam1[MAX_PARAMS]; static char *nam2[MAX_PARAMS]; static int mat[MAX_PARAMS]; static int mis[MAX_PARAMS]; static int ope[MAX_PARAMS]; static int ext[MAX_PARAMS]; static char *enam[MAX_PARAMS]; static int lope[MAX_PARAMS]; static int lext[MAX_PARAMS]; static int rope[MAX_PARAMS]; static int rext[MAX_PARAMS]; static int gave_default = 0; static int egave_default = 0; static int default_mat; static int default_mis; static int default_ope; static int default_ext; static int default_lope; static int default_lext; static int default_rope; static int default_rext; /* for reading LAV files */ static FILE *lav_fp; static int seq_no, block_no, first_line; void fatal(); void fatalf(); /* score_name - record the name of the file containing scores */ void score_name(char *name) { char *strsave(); score_file = strsave(name); } static int get_params(); /* get_DNA_scores - get scores, return 1 if successful */ int get_DNA_scores(char *s1, char *s2, int *match, int *mismatch, int *open, int *extend) { int i, strsame(); if (param_sets == 0 && get_params() == 0) return 0; for (i = 0; i < param_sets; ++i) if ((strsame(nam1[i],s1) && strsame(nam2[i],s2)) || (strsame(nam1[i],s2) && strsame(nam2[i],s1))) { *match = mat[i]; *mismatch = mis[i]; *open = ope[i]; *extend = ext[i]; return 1; } if (!gave_default) return 0; *match = default_mat; *mismatch = default_mis; *open = default_ope; *extend = default_ext; return 1; } /* get_DNA_end_scores - get end_gap scores, return 1 if successful */ int get_DNA_end_scores(char *s1, int *lopen, int *lextend, int *ropen, int *rextend) { FILE *ckopen(); int i, strsame(); for (i = 0; i < eparam_sets; ++i) if (strsame(enam[i],s1)) { *lopen = lope[i]; *lextend = lext[i]; *ropen = rope[i]; *rextend = rext[i]; return 1; } if (!egave_default) return 0; *lopen = default_lope; *lextend = default_lext; *ropen = default_rope; *rextend = default_rext; return 1; } /* print_score - print scores */ void print_score() { int i; if (gave_default) param_sets--; printf("\n G GE match mismatch\n"); for (i = 0; i < param_sets; ++i) printf(" %-12s %-12s %3d %3d %3d %3d\n", nam1[i],nam2[i],ope[i],ext[i],mat[i],mis[i]); if (gave_default) printf(" default %3d %3d %3d %3d\n", default_ope,default_ext,default_mat,default_mis); if (egave_default || eparam_sets > 0) { if (egave_default) eparam_sets--; printf("\n Penalties for the end gaps:\n"); printf(" LG LGE RG RGE\n"); for (i = 0; i < eparam_sets; ++i) printf(" %-12s %3d %3d %3d %3d\n", enam[i],lope[i],lext[i],rope[i],rext[i]); if (egave_default) printf(" default %3d %3d %3d %3d\n", default_lope,default_lext,default_rope,default_rext); } } static int get_params() { FILE *fp, *fopen(); char buf[100], name1[100], name2[100], *fgets(), *strsave(); int i, strncmp(); char eflag; if (!score_file) score_file = strsave("Scores"); if ((fp = fopen(score_file, "r")) == NULL) return 0; eflag = 0; while (fgets(buf, 100, fp)) { if (!eflag) { if (buf[0] == '\n' || buf[0] == '\0') eflag = 1; else { if ((i = param_sets++) >= MAX_PARAMS) fatal("Scores file is too big."); if (strncmp("default", buf, 7)) { if (sscanf(buf, "%s %s %d %d %d %d", name1, name2, ope+i, ext+i, mat+i, mis+i) == EOF) fatal("Improper score file."); } else { gave_default = 1; if (sscanf(buf+7, "%d %d %d %d", &default_ope, &default_ext, &default_mat, &default_mis) == EOF) fatal("Improper score file."); } nam1[i] = strsave(name1); nam2[i] = strsave(name2); } } else { if ((i = eparam_sets++) >= MAX_PARAMS) fatal("Scores file is too big."); if (strncmp("default", buf, 7)) { if (sscanf(buf, "%s %d %d %d %d", name1, lope+i, lext+i, rope+i, rext+i) == EOF) fatal("Improper score file."); } else { egave_default = 1; if (sscanf(buf+7, "%d %d %d %d", &default_lope, &default_lext, &default_rope, &default_rext) == EOF) fatal("Improper score file."); } enam[i] = strsave(name1); } } fclose(fp); return 1; } static int ckgetc(FILE *fp); /* get_seq - read a sequence */ int get_seq( char *file_name, char **seqptr, char **nameptr, int *fromptr, int *toptr) { FILE *fp, *ckopen(); char *p, *fgets(), *strchr(), *ckalloc(), *strsave(); int c, length, extract_seq(); long n; /* check for position specification */ if ((p = strchr(file_name, '{')) || (p = strchr(file_name, '[')) || (p = strchr(file_name, '('))) { sscanf(p+1, "%d,%d", fromptr, toptr); if (*fromptr <= 0 || *fromptr > *toptr) fatalf("%s: Improper positions specification.", file_name); c = *p; *p = '\0'; *nameptr = strsave(file_name); *p = c; return extract_seq(*nameptr, seqptr, *fromptr, *toptr); } fp = ckopen(file_name, "r"); for (n = 0; (c = getc(fp)) != EOF; ) if (c != '\n') ++n; *seqptr = ckalloc((n+1)*sizeof(char)); if (n == 0) return 0; rewind(fp); for (p = *seqptr; ; ) { if (fgets(p, (int)n, fp) == NULL) { fclose(fp); *p = '\0'; break; } if (p == *seqptr && !isalpha(*p)) continue; while (isalpha(*p)) *p++ = toupper(*p); if (*p != '\n' && *p != '\0') { fprintf(stderr, "Illegal character %c in file %s.\n", *p, file_name); exit(1); } } length = p - *seqptr; /* defaults: */ *nameptr = file_name; *fromptr = 1; *toptr = length; return length; } int extract_seq(char *file_name, char **seqptr, int from, int to) { FILE *fp, *ckopen(); int i, c, n = to - from + 1; char *p, *ckalloc(); *seqptr = ckalloc((n+1)*sizeof(char)); fp = ckopen(file_name, "r"); while (!isalpha(c = ckgetc(fp))) /* flush non-sequence lines */ while (c != '\n') c = ckgetc(fp); ungetc(c, fp); for (i = 0; i < from-1; ) if (isalpha(ckgetc(fp))) ++i; for (p = *seqptr; i < to; ) if (isalpha(c = ckgetc(fp))) { *p++ = c; ++i; } *p = '\0'; return n; } static int ckgetc(FILE *fp) { int c; if ((c = getc(fp)) == EOF) fatal("Unexpected end of sequence file."); if (islower(c)) c = toupper(c); return c; } int is_DNA(char *s) /* guess if sequence represents a DNA sequence */ { int len, ACGT; char *strchr(); len = ACGT = 0; for ( ; *s; ++s) { ++len; if (strchr("ACGT", *s)) ++ACGT; } return 3*len < 4*ACGT; } /* lav_read_init - intialization for reading an alignment file */ void lav_read_init(FILE *fp) { seq_no = 0; block_no = 0; lav_fp = fp; first_line = 1; } /* get_seqname - get a sequence name from an alignment file */ char *get_seqname(int *fromptr, int *toptr, int echo) { char buf[100], *s, *fgets(), *ckalloc(), *strsave(); char *strtok(), *seqname; int atoi(), strcmp(); if (seq_no == 0) while (fgets(buf, 100, lav_fp)) { if (first_line) if (strcmp(buf, "#:lav\n")) fatal("Not a lav file."); else first_line = 0; if (echo) printf("%s",buf); if (buf[0] == 's') break; } if (fgets(buf, 100, lav_fp)) { if (echo) printf("%s",buf); if((s = strtok(buf,"\" \t\n\0")) == NULL) return NULL; seqname = strsave(s); if((s = strtok(NULL,"\" \t\n\0")) == NULL) return NULL; *fromptr = atoi(s); if((s = strtok(NULL,"\" \t\n\0")) == NULL) return NULL; *toptr = atoi(s); seq_no++; return seqname; } else return NULL; } /* get_block - get a block from an alignment file */ int get_block(int **block, int echo) { char buf[100], tbuf[100], *s, *fgets(), *ckalloc(), *strsave(); char *strtok(), *strncpy(); int i, j, atoi(), strsame(); int *bp; char lflag; lflag = 0; while (fgets(buf, 100, lav_fp)) { strncpy(tbuf,buf,100); s = strtok(buf,"\" \t\n\0"); if(s != NULL && strsame(s,"l")) { lflag = 1; break; } if (echo) printf("%s",tbuf); } if(lflag) { *block = bp = (int *) ckalloc(2*seq_no*sizeof(int)); for (i=0; i<=1; ++i) for (j=0; j