diff --git a/Makefile b/Makefile index 09199aa1c..a158a126a 100644 --- a/Makefile +++ b/Makefile @@ -231,7 +231,7 @@ bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h) $(htslib_synced_bcf call_h = call.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) vcmp.h variantkey_h = variantkey.h hex.h convert_h = convert.h $(htslib_vcf_h) -tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h) +tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h) $(bcftools_h) filter_h = filter.h $(htslib_vcf_h) gvcf_h = gvcf.h $(bcftools_h) khash_str2str_h = khash_str2str.h $(htslib_khash_h) diff --git a/abuf.c b/abuf.c index b125679b9..98f133f71 100644 --- a/abuf.c +++ b/abuf.c @@ -721,7 +721,7 @@ static inline int _is_acgtn(char *seq) { while ( *seq ) { - char c = toupper(*seq); + char c = toupper_c(*seq); if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) return 0; seq++; } diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c index 975504f8a..b48509a24 100644 --- a/bam2bcf_indel.c +++ b/bam2bcf_indel.c @@ -36,6 +36,9 @@ DEALINGS IN THE SOFTWARE. */ #include KSORT_INIT_GENERIC(uint32_t) +// Avoid having to include all of bcftools.h +static inline char toupper_c(char c) { return toupper((unsigned char) c); } + #define MINUS_CONST 0x10000000 #define MAX_TYPES 64 @@ -89,8 +92,8 @@ inline int est_indelreg(int pos, const char *ref, int l, char *ins4) int i, j, max = 0, max_i = pos, score = 0; l = abs(l); for (i = pos + 1, j = 0; ref[i]; ++i, ++j) { - if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1; - else score += (toupper(ref[i]) != toupper(ref[pos+1+j%l]))? -10 : 1; + if (ins4) score += (toupper_c(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1; + else score += (toupper_c(ref[i]) != toupper_c(ref[pos+1+j%l]))? -10 : 1; if (score < 0) break; if (max < score) max = score, max_i = i; } diff --git a/bam_sample.c b/bam_sample.c index d8c10b8b3..aa1499868 100644 --- a/bam_sample.c +++ b/bam_sample.c @@ -295,18 +295,18 @@ int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &ori); escaped = 0; ptr++; } if ( *ptr ) { - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &ren); escaped = 0; ptr++; @@ -343,18 +343,18 @@ int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &fld1); escaped = 0; ptr++; } if ( *ptr ) { - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &fld2); escaped = 0; ptr++; @@ -362,11 +362,11 @@ int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file) } if ( *ptr ) { - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &fld3); escaped = 0; ptr++; diff --git a/bcftools.h b/bcftools.h index 5a4071d96..a06ce60d0 100644 --- a/bcftools.h +++ b/bcftools.h @@ -31,6 +31,7 @@ THE SOFTWARE. */ #include #include #include +#include #define FT_TAB_TEXT 0 // custom tab-delimited text file #define FT_GZ 1 @@ -184,4 +185,20 @@ static inline int get_unseen_allele(bcf1_t *line) return 0; } +// wrappers, borrowed from htslib's textutils_internal.h +// The functions operate on ints such as are returned by fgetc(), +// i.e., characters represented as unsigned-char-valued ints, or EOF. +// To operate on plain chars (and to avoid warnings on some platforms), +// technically one must cast to unsigned char everywhere (see CERT STR37-C) +// or less painfully use these *_c() functions that operate on plain chars +// (but not EOF, which must be considered separately where it is applicable). +static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } +static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } +static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } +static inline int isprint_c(char c) { return isprint((unsigned char) c); } +static inline int ispunct_c(char c) { return ispunct((unsigned char) c); } +static inline int isspace_c(char c) { return isspace((unsigned char) c); } +static inline char tolower_c(char c) { return tolower((unsigned char) c); } +static inline char toupper_c(char c) { return toupper((unsigned char) c); } + #endif diff --git a/consensus.c b/consensus.c index 166a4de16..c95753c27 100644 --- a/consensus.c +++ b/consensus.c @@ -339,7 +339,7 @@ static void destroy_data(args_t *args) static void init_region(args_t *args, char *line) { char *ss, *se = line; - while ( *se && !isspace(*se) && *se!=':' ) se++; + while ( *se && !isspace_c(*se) && *se!=':' ) se++; hts_pos_t from = 0, to = 0; char tmp = 0, *tmp_ptr = NULL; if ( *se ) @@ -353,7 +353,7 @@ static void init_region(args_t *args, char *line) from--; ss = ++se; to = strtol(ss,&se,10); - if ( ss==se || (*se && !isspace(*se)) ) { from = 0; to = 0; } + if ( ss==se || (*se && !isspace_c(*se)) ) { from = 0; to = 0; } else to--; } } @@ -500,9 +500,9 @@ static void mark_ins(char *ref, char *alt, char mark) { int i, nref = strlen(ref), nalt = strlen(alt); if ( mark==TO_LOWER ) - for (i=nref; iprev_base_pos==rec->pos && toupper(ref_allele[0])==toupper(args->prev_base) ) + if ( args->prev_base_pos==rec->pos && toupper_c(ref_allele[0])==toupper_c(args->prev_base) ) { if ( rec->rlen==1 ) fail = 0; else if ( !strncasecmp(ref_allele+1,args->fa_buf.s+idx+1,rec->rlen-1) ) fail = 0; @@ -992,11 +987,11 @@ static void apply_variant(args_t *args, bcf1_t *rec) } int safe_idx = idx<0 ? 0 : idx; // idx can be negative in case of overlapping deletion - args->fa_case = toupper(args->fa_buf.s[safe_idx])==args->fa_buf.s[safe_idx] ? TO_UPPER : TO_LOWER; + args->fa_case = toupper_c(args->fa_buf.s[safe_idx])==args->fa_buf.s[safe_idx] ? TO_UPPER : TO_LOWER; if ( args->fa_case==TO_UPPER ) - for (i=0; imark_ins && len_diff>0 ) mark_ins(ref_allele, alt_allele, args->mark_ins); @@ -1080,9 +1075,9 @@ static void mask_region(args_t *args, char *seq, int len) if ( idx_start < 0 ) idx_start = 0; if ( idx_end >= len ) idx_end = len - 1; if ( mask->with==MASK_UC ) - for (j=idx_start; j<=idx_end; j++) seq[j] = toupper(seq[j]); + for (j=idx_start; j<=idx_end; j++) seq[j] = toupper_c(seq[j]); else if ( mask->with==MASK_LC ) - for (j=idx_start; j<=idx_end; j++) seq[j] = tolower(seq[j]); + for (j=idx_start; j<=idx_end; j++) seq[j] = tolower_c(seq[j]); else for (j=idx_start; j<=idx_end; j++) seq[j] = mask->with; } @@ -1124,7 +1119,7 @@ static void consensus(args_t *args) args->fa_src_pos += str.l; // determine if uppercase or lowercase is used in this fasta file - if ( args->fa_case==-1 ) args->fa_case = toupper(str.s[0])==str.s[0] ? 1 : 0; + if ( args->fa_case==-1 ) args->fa_case = toupper_c(str.s[0])==str.s[0] ? 1 : 0; if ( args->mask ) mask_region(args, str.s, str.l); kputs(str.s, &args->fa_buf); diff --git a/convert.c b/convert.c index 43cdf507e..ad5bc4043 100644 --- a/convert.c +++ b/convert.c @@ -1461,7 +1461,7 @@ static int parse_subscript(char **p) char *q = *p; if ( *q!='{' ) return -1; q++; - while ( *q && *q!='}' && isdigit(*q) ) q++; + while ( *q && *q!='}' && isdigit_c(*q) ) q++; if ( *q!='}' ) return -1; int idx = atoi((*p)+1); *p = q+1; @@ -1474,7 +1474,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf) if ( is_vcf_column ) p++; char *q = ++p; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; kstring_t str = {0,0,0}; if ( q-p==0 ) error("Could not parse format string: %s\n", convert->format_str); kputsn(p, q-p, &str); @@ -1517,7 +1517,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf) } p = ++q; str.l = 0; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; if ( q-p==0 ) error("Could not parse format string: %s\n", convert->format_str); kputsn(p, q-p, &str); fmt_t *fmt = register_tag(convert, str.s, is_gtf, T_INFO); @@ -1567,7 +1567,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf) { p = ++q; str.l = 0; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; if ( q-p==0 ) error("Could not parse format string: %s\n", convert->format_str); kputsn(p, q-p, &str); fmt_t *fmt = register_tag(convert, str.s, is_gtf, T_INFO); diff --git a/csq.c b/csq.c index 9572fd32b..3007ef3d1 100644 --- a/csq.c +++ b/csq.c @@ -2844,7 +2844,7 @@ static int sanity_check_ref(args_t *args, gf_tscript_t *tr, bcf1_t *rec) int i = 0; while ( ref[i] && vcf[i] ) { - if ( ref[i]!=vcf[i] && toupper(ref[i])!=toupper(vcf[i]) ) + if ( ref[i]!=vcf[i] && toupper_c(ref[i])!=toupper_c(vcf[i]) ) { if ( !args->force ) error("Error: the fasta reference does not match the VCF REF allele at %s:%"PRId64" .. fasta=%c vcf=%c\n", diff --git a/filter.c b/filter.c index 75d74b77b..686bace32 100644 --- a/filter.c +++ b/filter.c @@ -183,15 +183,15 @@ inline static void tok_init_samples(token_t *atok, token_t *btok, token_t *rtok) static int filters_next_token(char **str, int *len) { char *tmp = *str; - while ( *tmp && isspace(*tmp) ) tmp++; + while ( *tmp && isspace_c(*tmp) ) tmp++; *str = tmp; *len = 0; // test for doubles: d.ddde[+-]dd - if ( isdigit(*str[0]) || *str[0]=='.' ) // strtod would eat +/- + if ( isdigit_c(*str[0]) || *str[0]=='.' ) // strtod would eat +/- { double HTS_UNUSED v = strtod(*str, &tmp); - if ( *str!=tmp && (!tmp[0] || !isalnum(tmp[0])) ) + if ( *str!=tmp && (!tmp[0] || !isalnum_c(tmp[0])) ) { *len = tmp - (*str); return TOK_VAL; @@ -246,7 +246,7 @@ static int filters_next_token(char **str, int *len) if ( tmp[0]=='@' ) // file name { - while ( *tmp && !isspace(*tmp) && *tmp!='=' && *tmp!='!' ) tmp++; + while ( *tmp && !isspace_c(*tmp) && *tmp!='=' && *tmp!='!' ) tmp++; *len = tmp - (*str); return TOK_VAL; } @@ -258,7 +258,7 @@ static int filters_next_token(char **str, int *len) { if ( tmp[0]=='"' ) break; if ( tmp[0]=='\'' ) break; - if ( isspace(tmp[0]) ) break; + if ( isspace_c(tmp[0]) ) break; if ( tmp[0]=='<' ) break; if ( tmp[0]=='>' ) break; if ( tmp[0]=='=' ) break; @@ -3202,7 +3202,7 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok) for (i=0; ihash,list[i]) ) khash_str2int_inc(tok->hash,list[i]); @@ -3541,7 +3541,7 @@ static void filter_debug_print(token_t *toks, token_t **tok_ptrs, int ntoks) static void str_to_lower(char *str) { - while ( *str ) { *str = tolower(*str); str++; } + while ( *str ) { *str = tolower_c(*str); str++; } } static int perl_exec(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack) { @@ -3606,7 +3606,7 @@ static int perl_exec(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack static void perl_init(filter_t *filter, char **str) { char *beg = *str; - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !*beg ) return; if ( strncasecmp("perl:", beg, 5) ) return; #if ENABLE_PERL_FILTERS @@ -3840,7 +3840,7 @@ static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error if ( ret == TOK_PERLSUB ) { - while ( *beg && ((isalnum(*beg) && !ispunct(*beg)) || *beg=='_') ) beg++; + while ( *beg && ((isalnum_c(*beg) && !ispunct_c(*beg)) || *beg=='_') ) beg++; if ( *beg!='(' ) error("[%s:%d] Could not parse the expression: %s\n", __FILE__,__LINE__,str); // the subroutine name @@ -3900,7 +3900,7 @@ static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error } else if ( !len ) // all tokes read or an error { - if ( *tmp && !isspace(*tmp) ) error("Could not parse the expression: [%s]\n", str); + if ( *tmp && !isspace_c(*tmp) ) error("Could not parse the expression: [%s]\n", str); break; // all tokens read } else // TOK_VAL: annotation name or value diff --git a/mcall.c b/mcall.c index 13383787e..ced4f53de 100644 --- a/mcall.c +++ b/mcall.c @@ -31,6 +31,9 @@ THE SOFTWARE. */ #include "call.h" #include "prob1.h" +// Avoid having to include all of bcftools.h +static inline int isspace_c(char c) { return isspace((unsigned char) c); } + // Using priors for GTs does not seem to be mathematically justified. Although // it seems effective in removing false calls, it also flips a significant // proportion of HET genotypes. Better is to filter by FORMAT/GQ using @@ -308,10 +311,10 @@ static void init_sample_groups(call_t *call) for (i=0; isample_groups,lines[i]); char *tmp = ptr; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Could not parse the line in %s, expected a sample name followed by tab and a population name: %s\n",call->sample_groups,lines[i]); *tmp = 0; int ismpl = bcf_hdr_id2int(call->hdr, BCF_DT_SAMPLE, lines[i]); diff --git a/mpileup.c b/mpileup.c index 00d21dbe5..8afd9edd9 100644 --- a/mpileup.c +++ b/mpileup.c @@ -1074,7 +1074,7 @@ int read_file_list(const char *file_list,int *n,char **argv[]) { // allow empty lines and trailing spaces len = strlen(buf); - while ( len>0 && isspace(buf[len-1]) ) len--; + while ( len>0 && isspace_c(buf[len-1]) ) len--; if ( !len ) continue; // check sanity of the file list @@ -1084,7 +1084,7 @@ int read_file_list(const char *file_list,int *n,char **argv[]) // no such file, check if it is safe to print its name int i, safe_to_print = 1; for (i=0; itmp_str.l = 0; kputsn(ss,se-ss,&ploidy->tmp_str); @@ -102,7 +102,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg } ss = se; - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; if ( !*se ) error("Could not parse: %s\n", line); sp->ploidy = strtol(ss,&se,10); if ( ss==se ) error("Could not parse: %s\n", line); @@ -163,13 +163,13 @@ ploidy_t *ploidy_init_string(const char *str, int dflt) const char *ss = str; while ( *ss ) { - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; const char *se = ss; while ( *se && *se!='\r' && *se!='\n' ) se++; tmp.l = 0; kputsn(ss, se-ss, &tmp); regidx_insert(pld->idx,tmp.s); - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; } free(tmp.s); diff --git a/plugins/fill-tags.c b/plugins/fill-tags.c index 473bc1580..c803e617c 100644 --- a/plugins/fill-tags.c +++ b/plugins/fill-tags.c @@ -171,14 +171,14 @@ void parse_samples(args_t *args, char *fname) // NA12400 GRP1 // NA18507 GRP1,GRP2 char *pop_names = str.s + str.l - 1; - while ( pop_names >= str.s && isspace(*pop_names) ) pop_names--; + while ( pop_names >= str.s && isspace_c(*pop_names) ) pop_names--; if ( pop_names <= str.s ) error("Could not parse the file: %s\n", str.s); pop_names[1] = 0; // trailing spaces - while ( pop_names >= str.s && !isspace(*pop_names) ) pop_names--; + while ( pop_names >= str.s && !isspace_c(*pop_names) ) pop_names--; if ( pop_names <= str.s ) error("Could not parse the file: %s\n", str.s); char *smpl = pop_names++; - while ( smpl >= str.s && isspace(*smpl) ) smpl--; + while ( smpl >= str.s && isspace_c(*smpl) ) smpl--; if ( smpl <= str.s+1 ) error("Could not parse the file: %s\n", str.s); smpl[1] = 0; smpl = str.s; diff --git a/plugins/fixploidy.c b/plugins/fixploidy.c index a6bc7757e..2cb6afbd1 100644 --- a/plugins/fixploidy.c +++ b/plugins/fixploidy.c @@ -87,11 +87,11 @@ void set_samples(char *fname, bcf_hdr_t *hdr, ploidy_t *ploidy, int *sample2sex) while ( hts_getline(fp, KS_SEP_LINE, &tmp) > 0 ) { char *ss = tmp.s; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) error("Could not parse: %s\n", tmp.s); if ( *ss=='#' ) continue; char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; char x = *se; *se = 0; int ismpl = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, ss); @@ -99,10 +99,10 @@ void set_samples(char *fname, bcf_hdr_t *hdr, ploidy_t *ploidy, int *sample2sex) *se = x; ss = se+1; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) error("Could not parse: %s\n", tmp.s); se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( se==ss ) error("Could not parse: %s\n", tmp.s); sample2sex[ismpl] = ploidy_add_sex(ploidy, ss); diff --git a/plugins/fixref.c b/plugins/fixref.c index abb3367ea..b38ebbf12 100644 --- a/plugins/fixref.c +++ b/plugins/fixref.c @@ -278,7 +278,7 @@ static bcf1_t *set_ref_alt(args_t *args, bcf1_t *rec, const char ref, const char static inline int nt2int(char nt) { - nt = toupper(nt); + nt = toupper_c(nt); if ( nt=='A' ) return 0; if ( nt=='C' ) return 1; if ( nt=='G' ) return 2; diff --git a/plugins/gvcfz.c b/plugins/gvcfz.c index 91d43f159..4fcc24e21 100644 --- a/plugins/gvcfz.c +++ b/plugins/gvcfz.c @@ -130,7 +130,7 @@ static void init_groups(args_t *args) char *rmme_str = strdup(args->group_by), *beg = rmme_str; while ( *beg ) { - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !beg ) break; char *end = beg; while ( *end && *end!=':' ) end++; @@ -155,8 +155,8 @@ static void init_groups(args_t *args) if ( !strcmp(flt,"PASS") ) grp->flt_id = -1; // remove trailing spaces - beg = grp->expr + strlen(grp->expr); while ( beg >= grp->expr && isspace(*beg) ) { *beg = 0; beg--; } - beg = grp->expr; while ( *beg && isspace(*beg) ) beg++; + beg = grp->expr + strlen(grp->expr); while ( beg >= grp->expr && isspace_c(*beg) ) { *beg = 0; beg--; } + beg = grp->expr; while ( *beg && isspace_c(*beg) ) beg++; grp->flt = strcmp("-",beg) ? filter_init(args->hdr_in, grp->expr) : NULL; diff --git a/plugins/mendelian2.c b/plugins/mendelian2.c index 837b97da2..0347cd0e2 100644 --- a/plugins/mendelian2.c +++ b/plugins/mendelian2.c @@ -235,12 +235,12 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ // eat any leading spaces char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip empty lines // sex id, e.g. 1X or 2X char keep, *tmp, *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not parse the sex ID in the region line: %s\n", line); keep = *se; *se = 0; @@ -252,13 +252,13 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ sex_id = args->nsex_id++; } *se = keep; - while ( *se && isdigit(*se) ) se++; - while ( *se && isspace(*se) ) se++; + while ( *se && isdigit_c(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; // chromosome name, beg, end - while ( se[1] && !isspace(se[1]) ) se++; - while ( se > ss && isdigit(*se) ) se--; + while ( se[1] && !isspace_c(se[1]) ) se++; + while ( se > ss && isdigit_c(*se) ) se--; if ( *se!='-' ) error("Could not parse the region: %s\n",line); *end = strtol(se+1, &tmp, 10) - 1; if ( tmp==se+1 ) error("Could not parse the region: %s\n",line); @@ -270,8 +270,8 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ *chr_end = se-1; // skip region - while ( *ss && !isspace(*ss) ) ss++; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && !isspace_c(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; rule_t *rule = (rule_t*) payload; rule->sex_id = sex_id; @@ -279,7 +279,7 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ rule->ploidy = 0; // alleles inherited from mother (M), father (F), both (MF), none (.) - while ( *ss && !isspace(*ss) ) + while ( *ss && !isspace_c(*ss) ) { if ( *ss=='M' ) { rule->inherits |= 1<ploidy++; } else if ( *ss=='F' ) { rule->inherits |= 1<ploidy++; } diff --git a/plugins/scatter.c b/plugins/scatter.c index ec68e56c9..2afd0e04c 100644 --- a/plugins/scatter.c +++ b/plugins/scatter.c @@ -117,12 +117,12 @@ void mkdir_p(const char *fmt, ...) HTS_FORMAT(HTS_PRINTF_FMT, 1, 2); int regidx_parse_reg_name(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && *se!=':' && !isspace(*se) ) se++; + while ( *se && *se!=':' && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -140,9 +140,9 @@ int regidx_parse_reg_name(const char *line, char **chr_beg, char **chr_end, uint if ( *beg==0 ) { fprintf(stderr,"Could not parse reg line, expected 1-based coordinate: %s\n", line); return -2; } (*beg)--; - if ( !se[0] || isspace(se[0])) { + if ( !se[0] || isspace_c(se[0])) { *end = *beg; - } else if ( se[0] == '-' && (!se[1] || isspace(se[1])) ) { + } else if ( se[0] == '-' && (!se[1] || isspace_c(se[1])) ) { *end = MAX_COOR_0; se++; } else { @@ -155,7 +155,7 @@ int regidx_parse_reg_name(const char *line, char **chr_beg, char **chr_end, uint } ss = se; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !ss[0] ) ss = (char *)line; int *idx = (int *)payload; @@ -180,7 +180,7 @@ static void open_set(subset_t *set, args_t *args) int k, l = args->str.l; if (args->prefix) kputs(args->prefix, &args->str); kputs(set->fname, &args->str); - for (k=l; kstr.l; k++) if ( isspace(args->str.s[k]) ) args->str.s[k] = '_'; + for (k=l; kstr.l; k++) if ( isspace_c(args->str.s[k]) ) args->str.s[k] = '_'; if ( args->output_type & FT_BCF ) kputs(".bcf", &args->str); else if ( args->output_type & FT_GZ ) kputs(".vcf.gz", &args->str); else kputs(".vcf", &args->str); diff --git a/plugins/setGT.c b/plugins/setGT.c index 6495006c3..7340f7f74 100644 --- a/plugins/setGT.c +++ b/plugins/setGT.c @@ -162,12 +162,12 @@ void parse_binom_expr(args_t *args, char *str) if ( str[1]!=':' ) _parse_binom_expr_error(str); char *beg = str+2; - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !*beg ) _parse_binom_expr_error(str); char *end = beg; while ( *end ) { - if ( isspace(*end) || *end=='<' || *end=='=' || *end=='>' ) break; + if ( isspace_c(*end) || *end=='<' || *end=='=' || *end=='>' ) break; end++; } if ( !*end ) _parse_binom_expr_error(str); @@ -176,7 +176,7 @@ void parse_binom_expr(args_t *args, char *str) int tag_id = bcf_hdr_id2int(args->in_hdr,BCF_DT_ID,args->binom_tag); if ( !bcf_hdr_idinfo_exists(args->in_hdr,BCF_HL_FMT,tag_id) ) error("The FORMAT tag \"%s\" is not present in the VCF\n", args->binom_tag); - while ( *end && isspace(*end) ) end++; + while ( *end && isspace_c(*end) ) end++; if ( !*end ) _parse_binom_expr_error(str); if ( !strncmp(end,"<=",2) ) { args->binom_cmp = cmp_le; beg = end+2; } @@ -187,11 +187,11 @@ void parse_binom_expr(args_t *args, char *str) else if ( !strncmp(end,"=",1) ) { args->binom_cmp = cmp_eq; beg = end+1; } else _parse_binom_expr_error(str); - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !*beg ) _parse_binom_expr_error(str); args->binom_val = strtod(beg, &end); - while ( *end && isspace(*end) ) end++; + while ( *end && isspace_c(*end) ) end++; if ( *end ) _parse_binom_expr_error(str); args->tgt_mask |= GT_BINOM; diff --git a/plugins/split-vep.c b/plugins/split-vep.c index 1a34bef4e..d0ac6433d 100644 --- a/plugins/split-vep.c +++ b/plugins/split-vep.c @@ -341,7 +341,7 @@ static void expand_csq_expression(args_t *args, kstring_t *str) char *ptr = strstr(args->format_str,str->s); if ( !ptr ) return; char *end = ptr + str->l, tmp = *end; - if ( isalnum(tmp) || tmp=='_' || tmp=='.' ) return; + if ( isalnum_c(tmp) || tmp=='_' || tmp=='.' ) return; *end = 0; str->l = 0; @@ -396,11 +396,11 @@ static void init_column2type(args_t *args) tmp.l = 0; kputc('^',&tmp); char *ptr = type[i]; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Error: failed to parse the column type \"%s\"\n",type[i]); kputsn(type[i],ptr-type[i],&tmp); kputc('$',&tmp); - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Error: failed to parse the column type \"%s\"\n",type[i]); args->ncolumn2type++; args->column2type = (col2type_t*) realloc(args->column2type,sizeof(*args->column2type)*args->ncolumn2type); @@ -463,7 +463,7 @@ static int query_has_field(char *fmt, char *field, kstring_t *str) ptr = strstr(ptr,str->s); if ( !ptr ) return 0; end = ptr[str->l]; - if ( isalnum(end) || end=='_' || end=='.' ) + if ( isalnum_c(end) || end=='_' || end=='.' ) { ptr++; continue; @@ -948,13 +948,13 @@ static void init_data(args_t *args) { if ( *ep=='#' ) { - while ( *ep && *ep!='\n' ) { *ep = tolower(*ep); ep++; } + while ( *ep && *ep!='\n' ) { *ep = tolower_c(*ep); ep++; } if ( !*ep ) break; ep++; continue; } char *bp = ep; - while ( *ep && !isspace(*ep) ) { *ep = tolower(*ep); ep++; } + while ( *ep && !isspace_c(*ep) ) { *ep = tolower_c(*ep); ep++; } char tmp = *ep; *ep = 0; args->nscale++; @@ -965,7 +965,7 @@ static void init_data(args_t *args) if ( !tmp ) break; if ( tmp=='\n' ) severity++; ep++; - while ( *ep && isspace(*ep) ) ep++; + while ( *ep && isspace_c(*ep) ) ep++; } // Transcript and consequence selection @@ -1098,7 +1098,7 @@ static void csq_to_severity(args_t *args, char *csq, int *min_severity, int *max while ( *ep ) { char *bp = ep; - while ( *ep && *ep!='&' ) { *ep = tolower(*ep); ep++; } + while ( *ep && *ep!='&' ) { *ep = tolower_c(*ep); ep++; } char tmp = *ep; *ep = 0; diff --git a/plugins/split.c b/plugins/split.c index 93d04a971..bbab6a666 100644 --- a/plugins/split.c +++ b/plugins/split.c @@ -203,7 +203,7 @@ void init_subsets(args_t *args) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; if ( *ptr==',' ) set->nsmpl++; // todo: allow commas in sample names kputc(*ptr, &str); escaped = 0; @@ -228,16 +228,16 @@ void init_subsets(args_t *args) } if ( !j ) continue; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; j = 0; if ( *ptr ) // optional second column with new sample names { set->rename = (char**) calloc(set->nsmpl, sizeof(*set->rename)); beg = ptr; - while ( *beg && !isspace(*beg) ) + while ( *beg && !isspace_c(*beg) ) { ptr = beg; - while ( *ptr && *ptr!=',' && !isspace(*ptr) ) ptr++; + while ( *ptr && *ptr!=',' && !isspace_c(*ptr) ) ptr++; char tmp = *ptr; *ptr = 0; if ( !strcmp("-",beg) ) @@ -248,7 +248,7 @@ void init_subsets(args_t *args) } set->rename[j++] = strdup(beg); *ptr = tmp; - if ( !tmp || isspace(tmp) ) break; + if ( !tmp || isspace_c(tmp) ) break; beg = ptr + 1; if ( j >= set->nsmpl ) error("Expected the same number of samples in the first and second column: %s\n",files[i]); @@ -262,7 +262,7 @@ void init_subsets(args_t *args) } } - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( *ptr ) // optional third column with file name { free(set->fname); @@ -293,7 +293,7 @@ void init_subsets(args_t *args) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; escaped = 0; ptr++; } @@ -311,18 +311,18 @@ void init_subsets(args_t *args) if ( tmp ) // two columns: new sample name { rename = ptr + 1; - while ( *rename && isspace(*rename) ) rename++; + while ( *rename && isspace_c(*rename) ) rename++; if ( !*rename ) rename = NULL; // trailing space else { ptr = rename; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; tmp = *ptr; *ptr = 0; if ( !strcmp("-",rename) ) rename = NULL; if ( tmp ) ptr++; } - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; } if ( !*ptr ) // no third column, use sample name as file name diff --git a/plugins/vrfs.c b/plugins/vrfs.c index e851003fa..eaddbdaaa 100644 --- a/plugins/vrfs.c +++ b/plugins/vrfs.c @@ -161,12 +161,12 @@ static int parse_sites(const char *line, char **chr_beg, char **chr_end, uint32_ // CHR part char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -180,9 +180,9 @@ static int parse_sites(const char *line, char **chr_beg, char **chr_end, uint32_ (*beg)--; // REF part and REF length - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int ref_len = se - ss; if ( !ref_len ) error("Could not parse the REF part of the line: %s\n",line); *end = *beg; // we are interested in overlaps at the POS only, not variant length @@ -195,9 +195,9 @@ static int parse_sites(const char *line, char **chr_beg, char **chr_end, uint32_ site->dist = calloc(args->profile.nbins,sizeof(*site->dist)); // ALT part - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int alt_len = se - ss; if ( !alt_len ) error("Could not parse the ALT part of the line: %s\n",line); site->alt = malloc(alt_len+1); @@ -738,7 +738,7 @@ static double *parse_float_array(const char *line, int *narray) const char *ptr = line; while ( *ptr ) { - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; n++; if ( *ptr ) ptr++; } @@ -748,7 +748,7 @@ static double *parse_float_array(const char *line, int *narray) { char *tmp; array[i] = strtod(ptr,&tmp); - if ( *tmp && !isspace(*tmp) ) error("Could not parse the float array: %s\n",line); + if ( *tmp && !isspace_c(*tmp) ) error("Could not parse the float array: %s\n",line); ptr = tmp+1; } *narray = n; @@ -771,11 +771,11 @@ static int parse_batch(const char *line, char **chr_beg, char **chr_end, uint32_ // CHR part char *ss = (char*) line + 5; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -2; // unexpected format char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -790,7 +790,7 @@ static int parse_batch(const char *line, char **chr_beg, char **chr_end, uint32_ // REF part ss = ++se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int ref_len = se - ss; *end = *beg; @@ -802,18 +802,18 @@ static int parse_batch(const char *line, char **chr_beg, char **chr_end, uint32_ // ALT part ss = ++se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int alt_len = se - ss; site->alt = malloc(alt_len+1); strncpy(site->alt,ss,alt_len); site->alt[alt_len] = 0; // skip the SCORE part - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not parse the SCORE part of the line: %s\n",line); - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; if ( !*se ) error("Could not parse the SCORE part of the line: %s\n",line); // read the PROFILE part diff --git a/regidx.c b/regidx.c index 445d7d585..40d909896 100644 --- a/regidx.c +++ b/regidx.c @@ -29,6 +29,9 @@ #include #include "regidx.h" +// Avoid having to include all of bcftools.h +static inline int isspace_c(char c) { return isspace((unsigned char) c); } + #define MAX_COOR_0 REGIDX_MAX // CSI and hts_itr_query limit, 0-based #define iBIN(x) ((x)>>13) @@ -211,13 +214,13 @@ regidx_t *regidx_init_string(const char *str, regidx_parse_f parser, regidx_free const char *ss = str; while ( *ss ) { - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; const char *se = ss; while ( *se && *se!='\r' && *se!='\n' ) se++; tmp.l = 0; kputsn(ss, se-ss, &tmp); regidx_insert(idx,tmp.s); - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; } free(tmp.s); @@ -476,12 +479,12 @@ int regidx_overlap(regidx_t *regidx, const char *chr, uint32_t beg, uint32_t end int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -508,12 +511,12 @@ int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, uint32_t int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -538,7 +541,7 @@ int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, uint32_t { ss = se+1; *end = strtod(ss, &se); - if ( ss==se || (*se && !isspace(*se)) ) *end = *beg; + if ( ss==se || (*se && !isspace_c(*se)) ) *end = *beg; else if ( *end==0 ) { fprintf(stderr,"Could not parse tab line, expected 1-based coordinate: %s\n", line); return -2; } else (*end)--; } @@ -555,7 +558,7 @@ int regidx_parse_vcf(const char *line, char **chr_beg, char **chr_end, uint32_t int regidx_parse_reg(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments diff --git a/reheader.c b/reheader.c index df631aae3..bb94e7a22 100644 --- a/reheader.c +++ b/reheader.c @@ -78,10 +78,10 @@ static char *copy_and_update_contig_line(faidx_t *fai, char *line, void *chr_see p = ++q; while ( *q && (*q==' ' || *q=='\t') ) { p++; q++; } // ^[A-Za-z_][0-9A-Za-z_.]*$ - if (p==q && *q && (isalpha(*q) || *q=='_')) + if (p==q && *q && (isalpha_c(*q) || *q=='_')) { q++; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; } int n = q-p; int m = 0; @@ -228,7 +228,7 @@ static void read_header_file(char *fname, kstring_t *hdr) if ( hts_close(fp) ) error("Close failed: %s\n", fname); free(tmp.s); - while ( hdr->l>0 && isspace(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines + while ( hdr->l>0 && isspace_c(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines kputc('\n',hdr); } @@ -248,17 +248,17 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &key); escaped = 0; ptr++; } if ( !*ptr ) break; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &val); escaped = 0; ptr++; @@ -273,7 +273,7 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id return 0; } - while ( hdr->l>0 && isspace(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines + while ( hdr->l>0 && isspace_c(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines hdr->s[hdr->l] = 0; kstring_t tmp = {0,0,0}; diff --git a/smpl_ilist.c b/smpl_ilist.c index 4bc4cec2e..7a0d5e275 100644 --- a/smpl_ilist.c +++ b/smpl_ilist.c @@ -40,7 +40,7 @@ void smpl_ilist_destroy(smpl_ilist_t *smpl) static inline int is_space_or_escaped(const char *min, const char *str) { - if ( !isspace(*str) ) return 0; + if ( !isspace_c(*str) ) return 0; int n = 0; while ( --str>=min && *str=='\\' ) n++; return n%2 ? 0 : 1; diff --git a/str_finder.c b/str_finder.c index a9281d811..cf9e4bdf9 100644 --- a/str_finder.c +++ b/str_finder.c @@ -94,7 +94,7 @@ static void add_rep(rep_ele **list, char *cons, int clen, int pos, int rlen, if (lower_only) { int lc = 0; for (i = el->start; i <= el->end; i++) { - if (islower(cons[i])) { + if (islower((unsigned char) cons[i])) { lc = 1; break; } diff --git a/test/test-regidx.c b/test/test-regidx.c index eed36ab74..8ec261d61 100644 --- a/test/test-regidx.c +++ b/test/test-regidx.c @@ -37,6 +37,9 @@ #include #include "regidx.h" +// Avoid having to include all of bcftools.h +static inline int isspace_c(char c) { return isspace((unsigned char) c); } + static int verbose = 0; void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) @@ -75,18 +78,18 @@ int custom_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg // Skip the fields that were parsed above char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; for (i=0; i<3; i++) { - while ( *ss && !isspace(*ss) ) ss++; + while ( *ss && !isspace_c(*ss) ) ss++; if ( !*ss ) return -2; // wrong number of fields - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; } if ( !*ss ) return -2; // Parse the payload char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; char **dat = (char**) payload; *dat = (char*) malloc(se-ss+1); memcpy(*dat,ss,se-ss+1); diff --git a/tsv2vcf.c b/tsv2vcf.c index 22dec3065..89b972088 100644 --- a/tsv2vcf.c +++ b/tsv2vcf.c @@ -79,14 +79,14 @@ int tsv_parse(tsv_t *tsv, bcf1_t *rec, char *str) tsv->ss = tsv->se = str; while ( *tsv->ss && tsv->icol < tsv->ncols ) { - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; if ( tsv->cols[tsv->icol].setter ) { int ret = tsv->cols[tsv->icol].setter(tsv,rec,tsv->cols[tsv->icol].usr); if ( ret<0 ) return -1; status++; } - while ( *tsv->se && isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && isspace_c(*tsv->se) ) tsv->se++; tsv->ss = tsv->se; tsv->icol++; } @@ -123,7 +123,7 @@ int tsv_setter_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr) { bcf_hdr_t *hdr = (bcf_hdr_t*)usr; char *sb = tsv->ss; - while ( *sb && !isspace(*sb) ) sb++; + while ( *sb && !isspace_c(*sb) ) sb++; if ( !*sb ) return -1; char tmp = *sb; *sb = ','; diff --git a/tsv2vcf.h b/tsv2vcf.h index 68757d459..53964adcf 100644 --- a/tsv2vcf.h +++ b/tsv2vcf.h @@ -27,6 +27,7 @@ #define __TSV2VCF_H__ #include +#include "bcftools.h" typedef struct _tsv_t tsv_t; typedef int (*tsv_setter_t)(tsv_t *, bcf1_t *, void *); @@ -65,12 +66,12 @@ static inline int tsv_next(tsv_t *tsv) if ( !*tsv->se ) return -1; if ( tsv->ss==tsv->se ) { - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; return 0; } - while ( *tsv->se && isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && isspace_c(*tsv->se) ) tsv->se++; tsv->ss = tsv->se; - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; return 0; } diff --git a/vcfannotate.c b/vcfannotate.c index 7e19fe0f1..47e70d82f 100644 --- a/vcfannotate.c +++ b/vcfannotate.c @@ -2229,12 +2229,12 @@ static void init_columns(args_t *args) for (i=0; imerge_method_str.l ) kputc(',',&args->merge_method_str); @@ -2834,7 +2834,7 @@ static void rename_chrs(args_t *args, char *fname) for (i=0; ihdr_out, map[i]); @@ -2844,9 +2844,9 @@ static void rename_chrs(args_t *args, char *fname) assert( j>=0 ); free(hrec->vals[j]); ss++; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *se = 0; hrec->vals[j] = strdup(ss); args->hdr_out->id[BCF_DT_CTG][rid].key = hrec->vals[j]; @@ -2891,7 +2891,7 @@ static int rename_annots_core(args_t *args, char *ori_tag, char *new_tag) assert( j>=0 ); free(hrec->vals[j]); char *ptr = new_tag; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; *ptr = 0; hrec->vals[j] = strdup(new_tag); args->hdr_out->id[BCF_DT_ID][id].key = hrec->vals[j]; @@ -2908,12 +2908,12 @@ static void rename_annots(args_t *args) for (i=0; irename_annots_nmap; i++) { char *ptr = args->rename_annots_map[i]; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Could not parse: %s\n", args->rename_annots_map[i]); char *rmme = ptr; *ptr = 0; ptr++; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( !*ptr ) { *rmme = ' '; error("Could not parse: %s\n", args->rename_annots_map[i]); } if ( rename_annots_core(args, args->rename_annots_map[i], ptr) < 0 ) error("Cannot rename \"%s\" to \"%s\"\n",args->rename_annots_map[i],ptr); diff --git a/vcfcall.c b/vcfcall.c index 7ea143666..063332b40 100644 --- a/vcfcall.c +++ b/vcfcall.c @@ -218,11 +218,11 @@ static char **parse_ped_samples(args_t *args, call_t *call, char **vals, int nva j = 0; while ( *tmp && j<5 ) { - if ( isspace(*tmp) ) + if ( isspace_c(*tmp) ) { *tmp = 0; ++tmp; - while ( isspace(*tmp) ) tmp++; // allow multiple spaces + while ( isspace_c(*tmp) ) tmp++; // allow multiple spaces col_ends[j] = tmp-1; j++; continue; @@ -312,11 +312,11 @@ static void set_samples(args_t *args, const char *fn, int is_file) for (i=0; iaux.hdr, BCF_DT_SAMPLE, ss); @@ -324,10 +324,10 @@ static void set_samples(args_t *args, const char *fn, int is_file) if ( old2new[ismpl] != -1 ) { fprintf(stderr,"Warning: The sample is listed multiple times: %s\n",ss); continue; } ss = se+(x != '\0'); - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) ss = "2"; // default ploidy se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( se==ss ) { *xptr = x; error("Could not parse: \"%s\"\n", lines[i]); } char *sex = ss; @@ -354,11 +354,11 @@ static void set_samples(args_t *args, const char *fn, int is_file) for (i=0; iaux.hdr, BCF_DT_SAMPLE, ss); @@ -416,12 +416,12 @@ static void init_missed_line(args_t *args) static int tgt_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) { fprintf(stderr,"Could not parse the line: %s\n", line); return -2; } if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -445,14 +445,14 @@ static int tgt_parse(const char *line, char **chr_beg, char **chr_end, uint32_t while ( *ss ) { se = ss; - while ( *se && *se!=',' && !isspace(*se) ) se++; + while ( *se && *se!=',' && !isspace_c(*se) ) se++; als->n++; als->allele = (char**)realloc(als->allele,als->n*sizeof(*als->allele)); als->allele[als->n-1] = (char*)malloc(se-ss+1); memcpy(als->allele[als->n-1],ss,se-ss); als->allele[als->n-1][se-ss] = 0; ss = se+1; - if ( !*se || isspace(*se) ) break; + if ( !*se || isspace_c(*se) ) break; } if ( als->n<2 ) error("Unable to parse the -T file; expected CHROM\\tPOS\\tREF,ALT with -C alleles but found instead:\n\t%s\n",line); return 0; diff --git a/vcfconvert.c b/vcfconvert.c index b01742ae6..db09166a5 100644 --- a/vcfconvert.c +++ b/vcfconvert.c @@ -201,8 +201,8 @@ static int _set_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr) if ( *se!='_' ) return -1; kputsn(ss,se-ss,&args->str); ss = ++se; - while ( se < tsv->se && *se!='_' && isspace(*tsv->se) ) se++; - if ( se < tsv->se && *se!='_' && isspace(*tsv->se) ) return -1; + while ( se < tsv->se && *se!='_' && isspace_c(*tsv->se) ) se++; + if ( se < tsv->se && *se!='_' && isspace_c(*tsv->se) ) return -1; kputc(',',&args->str); kputsn(ss,se-ss,&args->str); @@ -275,9 +275,9 @@ static int tsv_setter_verify_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr) args->rev_als = 1; } *tsv->se = tmp; - while ( *tsv->se && isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && isspace_c(*tsv->se) ) tsv->se++; tsv->ss = tsv->se; - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; tmp = *tsv->se; *tsv->se = 0; if ( !args->rev_als && strcmp(tsv->ss,rec->d.allele[1]) ) { *tsv->se = tmp; error("REF/ALT mismatch: [%s][%s]\n", tsv->ss,rec->d.allele[1]); } else if ( args->rev_als && strcmp(tsv->ss,rec->d.allele[0]) ) { *tsv->se = tmp; error("REF/ALT mismatch: [%s][%s]\n", tsv->ss,rec->d.allele[0]); } @@ -436,7 +436,7 @@ static void gensample_to_vcf(args_t *args) // of the columns (CHROM:POS_REF_ALT comes first or second) args->str.l = 0; char *sb = line.s, *se = line.s; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", gen_fname,line.s); if ( args->gen_3N6 ) // first column, just CHROM kputsn(sb, se-sb, &args->str); @@ -445,7 +445,7 @@ static void gensample_to_vcf(args_t *args) char *sc = strchr(sb,':'); if ( !sc || sc > se ) { - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", gen_fname,line.s); sb = ++se; sc = strchr(sb,':'); @@ -482,7 +482,7 @@ static void gensample_to_vcf(args_t *args) if ( !samples ) error("Could not read %s\n", sample_fname); for (i=2; iheader,samples[i]); } @@ -627,7 +627,7 @@ static void haplegendsample_to_vcf(args_t *args) // returned from hts_readlist (i=1, and not i=0) for (i=1; iheader,samples[i]); } @@ -736,13 +736,13 @@ static void hapsample_to_vcf(args_t *args) // Find out the chromosome name, it can be either in the first or second column args->str.l = 0; char *sb = line.s, *se = line.s; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", hap_fname,line.s); if ( !args->output_vcf_ids ) { // first column should be just CHROM, but the second must be CHROM:POS_REF_ALT, use that sb = ++se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", hap_fname,line.s); if ( !strchr(sb,':') ) error("Could not determine CHROM in the second column of %s: %s\n", hap_fname,line.s); @@ -781,7 +781,7 @@ static void hapsample_to_vcf(args_t *args) if ( !samples ) error("Could not read %s\n", sample_fname); for (i=2; iheader,samples[i]); } @@ -847,13 +847,13 @@ char *init_sample2sex(bcf_hdr_t *hdr, char *sex_fname) if ( !lines ) error("Could not read %s\n", sex_fname); for (i=0; iheader,rec->rid),(int64_t) rec->pos+1); int nals = 1, alleles[5] = { -1, -1, -1, -1, -1 }; // a,c,g,t,n - ref[0] = toupper(ref[0]); + ref[0] = toupper_c(ref[0]); int iref = acgt_to_5(ref[0]); alleles[iref] = 0; diff --git a/vcfgtcheck.c b/vcfgtcheck.c index d4999f1d4..1ce426ba0 100644 --- a/vcfgtcheck.c +++ b/vcfgtcheck.c @@ -359,13 +359,13 @@ static void init_data(args_t *args) for (i=0; inpairs; i++) { char *ptr = tmp[i]; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Could not parse %s: %s\n",args->pair_samples,tmp[i]); *ptr = 0; args->pairs[i].iqry = bcf_hdr_id2int(args->qry_hdr, BCF_DT_SAMPLE, tmp[i]); if ( args->pairs[i].iqry < 0 ) error("No such sample in %s: [%s]\n",args->qry_fname,tmp[i]); ptr++; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; args->pairs[i].igt = bcf_hdr_id2int(args->gt_hdr?args->gt_hdr:args->qry_hdr, BCF_DT_SAMPLE, ptr); if ( args->pairs[i].igt < 0 ) error("No such sample in %s: [%s]\n",args->gt_fname?args->gt_fname:args->qry_fname,ptr); free(tmp[i]); diff --git a/vcfisec.c b/vcfisec.c index 51750c170..2b2691a29 100644 --- a/vcfisec.c +++ b/vcfisec.c @@ -630,7 +630,7 @@ int main_vcfisec(int argc, char *argv[]) else if ( *p=='+' ) { args->isec_op = OP_PLUS; p++; } else if ( *p=='=' ) { args->isec_op = OP_EQUAL; p++; } else if ( *p=='~' ) { args->isec_op = OP_EXACT; p++; } - else if ( isdigit(*p) ) args->isec_op = OP_EQUAL; + else if ( isdigit_c(*p) ) args->isec_op = OP_EQUAL; else error("Could not parse --nfiles %s\n", optarg); if ( args->isec_op == OP_EXACT ) args->isec_exact = p; else if ( sscanf(p,"%d",&args->isec_n)!=1 ) error("Could not parse --nfiles %s\n", optarg); diff --git a/vcfmerge.c b/vcfmerge.c index ce6a71c9c..9f1a8e9ea 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -793,12 +793,12 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb) for (i=0; in_allele; i++) { - if ( toupper(als[0].s[ als[0].l-1 ]) != toupper(als[i].s[ als[i].l-1 ]) ) break; + if ( toupper_c(als[0].s[ als[0].l-1 ]) != toupper_c(als[i].s[ als[i].l-1 ]) ) break; if ( als[i].l < min_len ) min_len = als[i].l; } if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed @@ -490,7 +490,7 @@ static hts_pos_t realign_left(args_t *args, bcf1_t *line) int min_len = als[0].l - ntrim_left; for (i=1; in_allele; i++) { - if ( toupper(als[0].s[ntrim_left]) != toupper(als[i].s[ntrim_left]) ) break; + if ( toupper_c(als[0].s[ntrim_left]) != toupper_c(als[i].s[ntrim_left]) ) break; if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left; } if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed @@ -524,7 +524,7 @@ static hts_pos_t realign_right(args_t *args, bcf1_t *line) for (i=1; in_allele; i++) { if ( als[0].l!=als[i].l ) has_indel = 1; - if ( toupper(als[0].s[ntrim_left]) != toupper(als[i].s[ntrim_left]) ) break; + if ( toupper_c(als[0].s[ntrim_left]) != toupper_c(als[i].s[ntrim_left]) ) break; if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left; } if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed further @@ -559,7 +559,7 @@ static hts_pos_t realign_right(args_t *args, bcf1_t *line) int min_len = als[0].l; for (i=1; in_allele; i++) { - if ( toupper(als[0].s[ als[0].l-1 ]) != toupper(als[i].s[ als[i].l-1 ]) ) break; + if ( toupper_c(als[0].s[ als[0].l-1 ]) != toupper_c(als[i].s[ als[i].l-1 ]) ) break; if ( min_len > als[i].l ) min_len = als[i].l; } if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed more diff --git a/vcfroh.c b/vcfroh.c index 1b3eff91f..d4da445c1 100644 --- a/vcfroh.c +++ b/vcfroh.c @@ -408,7 +408,7 @@ static int load_genmap(args_t *args, const char *chr) // skip second column tmp++; - while ( *tmp && !isspace(*tmp) ) tmp++; + while ( *tmp && !isspace_c(*tmp) ) tmp++; // read the genetic map in cM, scale from % to likelihood gm->rate = strtod(tmp+1, &end); @@ -677,9 +677,9 @@ int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq) str++; } *alt_freq = strtod(str, &tmp); - if ( *tmp && !isspace(*tmp) ) + if ( *tmp && !isspace_c(*tmp) ) { - if ( str[0]=='.' && (!str[1] || isspace(str[1])) ) return -1; // missing value + if ( str[0]=='.' && (!str[1] || isspace_c(str[1])) ) return -1; // missing value error("Could not parse: [%s]\n", tgt->line.s); } if ( *alt_freq<0 || *alt_freq>1 ) error("Could not parse AF: [%s]\n", tgt->line.s); diff --git a/vcmp.c b/vcmp.c index dbdc4b7ac..c5f9ec84a 100644 --- a/vcmp.c +++ b/vcmp.c @@ -30,6 +30,9 @@ THE SOFTWARE. */ #include #include "vcmp.h" +// Avoid having to include all of bcftools.h +static inline char toupper_c(char c) { return toupper((unsigned char) c); } + struct _vcmp_t { char *dref; @@ -57,7 +60,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) vcmp->ndref = 0; char *a = ref1, *b = ref2; - while ( *a && *b && toupper(*a)==toupper(*b) ) { a++; b++; } + while ( *a && *b && toupper_c(*a)==toupper_c(*b) ) { a++; b++; } if ( !*a && !*b ) return 0; if ( *a && *b ) return -1; // refs not compatible @@ -68,7 +71,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) while ( *a ) a++; vcmp->ndref = (a-ref1) - vcmp->nmatch; hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref); - for (i=0; indref; i++) vcmp->dref[i] = toupper(ref1[vcmp->nmatch+i]); + for (i=0; indref; i++) vcmp->dref[i] = toupper_c(ref1[vcmp->nmatch+i]); vcmp->dref[vcmp->ndref] = 0; return 0; } @@ -78,7 +81,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) while ( *b ) b++; vcmp->ndref = (b-ref2) - vcmp->nmatch; hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref); - for (i=0; indref; i++) vcmp->dref[i] = toupper(ref2[vcmp->nmatch+i]); + for (i=0; indref; i++) vcmp->dref[i] = toupper_c(ref2[vcmp->nmatch+i]); vcmp->dref[vcmp->ndref] = 0; vcmp->ndref *= -1; return 0; @@ -90,7 +93,7 @@ int vcmp_find_allele(vcmp_t *vcmp, char **als1, int nals1, char *al2) for (i=0; indref ) { @@ -103,14 +106,14 @@ int vcmp_find_allele(vcmp_t *vcmp, char **als1, int nals1, char *al2) { if ( vcmp->ndref<0 ) continue; for (j=0; jndref; j++) - if ( !a[j] || toupper(a[j])!=vcmp->dref[j] ) break; + if ( !a[j] || toupper_c(a[j])!=vcmp->dref[j] ) break; if ( j!=vcmp->ndref || a[j] ) continue; break; // found } if ( vcmp->ndref>0 ) continue; for (j=0; j<-vcmp->ndref; j++) - if ( !b[j] || toupper(b[j])!=vcmp->dref[j] ) break; + if ( !b[j] || toupper_c(b[j])!=vcmp->dref[j] ) break; if ( j!=-vcmp->ndref || b[j] ) continue; break; // found }