From c67520327f1bcbd9b96105b31cff0c6209071f2e Mon Sep 17 00:00:00 2001 From: Rob Davies Date: Wed, 11 Feb 2026 19:50:28 +0000 Subject: [PATCH] Wrap ctype.h functions to avoid array subscript warnings Copy HTSlib's internal wrappers for interfaces in into bcftools.h (as it's widely included already) and use them on cases where the input has `char` type. For most sources that don't currently include bcftools.h, the required inline function is copied into the top to avoid having to spread bcftools.h dependency to them (in all cases only one function is needed). Fixes "array subscript has type 'char'" warnings on platforms that still implement interfaces as a macro around an array look-up. ctype interface wrappers were originally authored by John Marshall in HTSlib commit fc9aeb6f7 --- Makefile | 2 +- abuf.c | 2 +- bam2bcf_indel.c | 7 +++++-- bam_sample.c | 16 ++++++++-------- bcftools.h | 17 +++++++++++++++++ consensus.c | 33 ++++++++++++++------------------- convert.c | 8 ++++---- csq.c | 2 +- filter.c | 20 ++++++++++---------- mcall.c | 7 +++++-- mpileup.c | 4 ++-- mpileup2/mpileup.c | 2 +- ploidy.c | 18 +++++++++--------- plugins/fill-tags.c | 6 +++--- plugins/fixploidy.c | 8 ++++---- plugins/fixref.c | 2 +- plugins/gvcfz.c | 6 +++--- plugins/mendelian2.c | 18 +++++++++--------- plugins/scatter.c | 12 ++++++------ plugins/setGT.c | 10 +++++----- plugins/split-vep.c | 16 ++++++++-------- plugins/split.c | 20 ++++++++++---------- plugins/vrfs.c | 30 +++++++++++++++--------------- regidx.c | 19 +++++++++++-------- reheader.c | 14 +++++++------- smpl_ilist.c | 2 +- str_finder.c | 2 +- test/test-regidx.c | 11 +++++++---- tsv2vcf.c | 6 +++--- tsv2vcf.h | 7 ++++--- vcfannotate.c | 16 ++++++++-------- vcfcall.c | 24 ++++++++++++------------ vcfconvert.c | 32 ++++++++++++++++---------------- vcfgtcheck.c | 4 ++-- vcfisec.c | 2 +- vcfmerge.c | 4 ++-- vcfnorm.c | 12 ++++++------ vcfroh.c | 6 +++--- vcmp.c | 15 +++++++++------ 39 files changed, 235 insertions(+), 207 deletions(-) diff --git a/Makefile b/Makefile index 09199aa1c..a158a126a 100644 --- a/Makefile +++ b/Makefile @@ -231,7 +231,7 @@ bcftools_h = bcftools.h $(htslib_hts_defs_h) $(htslib_vcf_h) $(htslib_synced_bcf call_h = call.h $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) vcmp.h variantkey_h = variantkey.h hex.h convert_h = convert.h $(htslib_vcf_h) -tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h) +tsv2vcf_h = tsv2vcf.h $(htslib_vcf_h) $(bcftools_h) filter_h = filter.h $(htslib_vcf_h) gvcf_h = gvcf.h $(bcftools_h) khash_str2str_h = khash_str2str.h $(htslib_khash_h) diff --git a/abuf.c b/abuf.c index b125679b9..98f133f71 100644 --- a/abuf.c +++ b/abuf.c @@ -721,7 +721,7 @@ static inline int _is_acgtn(char *seq) { while ( *seq ) { - char c = toupper(*seq); + char c = toupper_c(*seq); if ( c!='A' && c!='C' && c!='G' && c!='T' && c!='N' ) return 0; seq++; } diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c index 975504f8a..b48509a24 100644 --- a/bam2bcf_indel.c +++ b/bam2bcf_indel.c @@ -36,6 +36,9 @@ DEALINGS IN THE SOFTWARE. */ #include KSORT_INIT_GENERIC(uint32_t) +// Avoid having to include all of bcftools.h +static inline char toupper_c(char c) { return toupper((unsigned char) c); } + #define MINUS_CONST 0x10000000 #define MAX_TYPES 64 @@ -89,8 +92,8 @@ inline int est_indelreg(int pos, const char *ref, int l, char *ins4) int i, j, max = 0, max_i = pos, score = 0; l = abs(l); for (i = pos + 1, j = 0; ref[i]; ++i, ++j) { - if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1; - else score += (toupper(ref[i]) != toupper(ref[pos+1+j%l]))? -10 : 1; + if (ins4) score += (toupper_c(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1; + else score += (toupper_c(ref[i]) != toupper_c(ref[pos+1+j%l]))? -10 : 1; if (score < 0) break; if (max < score) max = score, max_i = i; } diff --git a/bam_sample.c b/bam_sample.c index d8c10b8b3..aa1499868 100644 --- a/bam_sample.c +++ b/bam_sample.c @@ -295,18 +295,18 @@ int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &ori); escaped = 0; ptr++; } if ( *ptr ) { - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &ren); escaped = 0; ptr++; @@ -343,18 +343,18 @@ int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &fld1); escaped = 0; ptr++; } if ( *ptr ) { - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &fld2); escaped = 0; ptr++; @@ -362,11 +362,11 @@ int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file) } if ( *ptr ) { - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &fld3); escaped = 0; ptr++; diff --git a/bcftools.h b/bcftools.h index 5a4071d96..a06ce60d0 100644 --- a/bcftools.h +++ b/bcftools.h @@ -31,6 +31,7 @@ THE SOFTWARE. */ #include #include #include +#include #define FT_TAB_TEXT 0 // custom tab-delimited text file #define FT_GZ 1 @@ -184,4 +185,20 @@ static inline int get_unseen_allele(bcf1_t *line) return 0; } +// wrappers, borrowed from htslib's textutils_internal.h +// The functions operate on ints such as are returned by fgetc(), +// i.e., characters represented as unsigned-char-valued ints, or EOF. +// To operate on plain chars (and to avoid warnings on some platforms), +// technically one must cast to unsigned char everywhere (see CERT STR37-C) +// or less painfully use these *_c() functions that operate on plain chars +// (but not EOF, which must be considered separately where it is applicable). +static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } +static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } +static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } +static inline int isprint_c(char c) { return isprint((unsigned char) c); } +static inline int ispunct_c(char c) { return ispunct((unsigned char) c); } +static inline int isspace_c(char c) { return isspace((unsigned char) c); } +static inline char tolower_c(char c) { return tolower((unsigned char) c); } +static inline char toupper_c(char c) { return toupper((unsigned char) c); } + #endif diff --git a/consensus.c b/consensus.c index 166a4de16..c95753c27 100644 --- a/consensus.c +++ b/consensus.c @@ -339,7 +339,7 @@ static void destroy_data(args_t *args) static void init_region(args_t *args, char *line) { char *ss, *se = line; - while ( *se && !isspace(*se) && *se!=':' ) se++; + while ( *se && !isspace_c(*se) && *se!=':' ) se++; hts_pos_t from = 0, to = 0; char tmp = 0, *tmp_ptr = NULL; if ( *se ) @@ -353,7 +353,7 @@ static void init_region(args_t *args, char *line) from--; ss = ++se; to = strtol(ss,&se,10); - if ( ss==se || (*se && !isspace(*se)) ) { from = 0; to = 0; } + if ( ss==se || (*se && !isspace_c(*se)) ) { from = 0; to = 0; } else to--; } } @@ -500,9 +500,9 @@ static void mark_ins(char *ref, char *alt, char mark) { int i, nref = strlen(ref), nalt = strlen(alt); if ( mark==TO_LOWER ) - for (i=nref; iprev_base_pos==rec->pos && toupper(ref_allele[0])==toupper(args->prev_base) ) + if ( args->prev_base_pos==rec->pos && toupper_c(ref_allele[0])==toupper_c(args->prev_base) ) { if ( rec->rlen==1 ) fail = 0; else if ( !strncasecmp(ref_allele+1,args->fa_buf.s+idx+1,rec->rlen-1) ) fail = 0; @@ -992,11 +987,11 @@ static void apply_variant(args_t *args, bcf1_t *rec) } int safe_idx = idx<0 ? 0 : idx; // idx can be negative in case of overlapping deletion - args->fa_case = toupper(args->fa_buf.s[safe_idx])==args->fa_buf.s[safe_idx] ? TO_UPPER : TO_LOWER; + args->fa_case = toupper_c(args->fa_buf.s[safe_idx])==args->fa_buf.s[safe_idx] ? TO_UPPER : TO_LOWER; if ( args->fa_case==TO_UPPER ) - for (i=0; imark_ins && len_diff>0 ) mark_ins(ref_allele, alt_allele, args->mark_ins); @@ -1080,9 +1075,9 @@ static void mask_region(args_t *args, char *seq, int len) if ( idx_start < 0 ) idx_start = 0; if ( idx_end >= len ) idx_end = len - 1; if ( mask->with==MASK_UC ) - for (j=idx_start; j<=idx_end; j++) seq[j] = toupper(seq[j]); + for (j=idx_start; j<=idx_end; j++) seq[j] = toupper_c(seq[j]); else if ( mask->with==MASK_LC ) - for (j=idx_start; j<=idx_end; j++) seq[j] = tolower(seq[j]); + for (j=idx_start; j<=idx_end; j++) seq[j] = tolower_c(seq[j]); else for (j=idx_start; j<=idx_end; j++) seq[j] = mask->with; } @@ -1124,7 +1119,7 @@ static void consensus(args_t *args) args->fa_src_pos += str.l; // determine if uppercase or lowercase is used in this fasta file - if ( args->fa_case==-1 ) args->fa_case = toupper(str.s[0])==str.s[0] ? 1 : 0; + if ( args->fa_case==-1 ) args->fa_case = toupper_c(str.s[0])==str.s[0] ? 1 : 0; if ( args->mask ) mask_region(args, str.s, str.l); kputs(str.s, &args->fa_buf); diff --git a/convert.c b/convert.c index 43cdf507e..ad5bc4043 100644 --- a/convert.c +++ b/convert.c @@ -1461,7 +1461,7 @@ static int parse_subscript(char **p) char *q = *p; if ( *q!='{' ) return -1; q++; - while ( *q && *q!='}' && isdigit(*q) ) q++; + while ( *q && *q!='}' && isdigit_c(*q) ) q++; if ( *q!='}' ) return -1; int idx = atoi((*p)+1); *p = q+1; @@ -1474,7 +1474,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf) if ( is_vcf_column ) p++; char *q = ++p; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; kstring_t str = {0,0,0}; if ( q-p==0 ) error("Could not parse format string: %s\n", convert->format_str); kputsn(p, q-p, &str); @@ -1517,7 +1517,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf) } p = ++q; str.l = 0; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; if ( q-p==0 ) error("Could not parse format string: %s\n", convert->format_str); kputsn(p, q-p, &str); fmt_t *fmt = register_tag(convert, str.s, is_gtf, T_INFO); @@ -1567,7 +1567,7 @@ static char *parse_tag(convert_t *convert, char *p, int is_gtf) { p = ++q; str.l = 0; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; if ( q-p==0 ) error("Could not parse format string: %s\n", convert->format_str); kputsn(p, q-p, &str); fmt_t *fmt = register_tag(convert, str.s, is_gtf, T_INFO); diff --git a/csq.c b/csq.c index 9572fd32b..3007ef3d1 100644 --- a/csq.c +++ b/csq.c @@ -2844,7 +2844,7 @@ static int sanity_check_ref(args_t *args, gf_tscript_t *tr, bcf1_t *rec) int i = 0; while ( ref[i] && vcf[i] ) { - if ( ref[i]!=vcf[i] && toupper(ref[i])!=toupper(vcf[i]) ) + if ( ref[i]!=vcf[i] && toupper_c(ref[i])!=toupper_c(vcf[i]) ) { if ( !args->force ) error("Error: the fasta reference does not match the VCF REF allele at %s:%"PRId64" .. fasta=%c vcf=%c\n", diff --git a/filter.c b/filter.c index 75d74b77b..686bace32 100644 --- a/filter.c +++ b/filter.c @@ -183,15 +183,15 @@ inline static void tok_init_samples(token_t *atok, token_t *btok, token_t *rtok) static int filters_next_token(char **str, int *len) { char *tmp = *str; - while ( *tmp && isspace(*tmp) ) tmp++; + while ( *tmp && isspace_c(*tmp) ) tmp++; *str = tmp; *len = 0; // test for doubles: d.ddde[+-]dd - if ( isdigit(*str[0]) || *str[0]=='.' ) // strtod would eat +/- + if ( isdigit_c(*str[0]) || *str[0]=='.' ) // strtod would eat +/- { double HTS_UNUSED v = strtod(*str, &tmp); - if ( *str!=tmp && (!tmp[0] || !isalnum(tmp[0])) ) + if ( *str!=tmp && (!tmp[0] || !isalnum_c(tmp[0])) ) { *len = tmp - (*str); return TOK_VAL; @@ -246,7 +246,7 @@ static int filters_next_token(char **str, int *len) if ( tmp[0]=='@' ) // file name { - while ( *tmp && !isspace(*tmp) && *tmp!='=' && *tmp!='!' ) tmp++; + while ( *tmp && !isspace_c(*tmp) && *tmp!='=' && *tmp!='!' ) tmp++; *len = tmp - (*str); return TOK_VAL; } @@ -258,7 +258,7 @@ static int filters_next_token(char **str, int *len) { if ( tmp[0]=='"' ) break; if ( tmp[0]=='\'' ) break; - if ( isspace(tmp[0]) ) break; + if ( isspace_c(tmp[0]) ) break; if ( tmp[0]=='<' ) break; if ( tmp[0]=='>' ) break; if ( tmp[0]=='=' ) break; @@ -3202,7 +3202,7 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok) for (i=0; ihash,list[i]) ) khash_str2int_inc(tok->hash,list[i]); @@ -3541,7 +3541,7 @@ static void filter_debug_print(token_t *toks, token_t **tok_ptrs, int ntoks) static void str_to_lower(char *str) { - while ( *str ) { *str = tolower(*str); str++; } + while ( *str ) { *str = tolower_c(*str); str++; } } static int perl_exec(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack) { @@ -3606,7 +3606,7 @@ static int perl_exec(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack static void perl_init(filter_t *filter, char **str) { char *beg = *str; - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !*beg ) return; if ( strncasecmp("perl:", beg, 5) ) return; #if ENABLE_PERL_FILTERS @@ -3840,7 +3840,7 @@ static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error if ( ret == TOK_PERLSUB ) { - while ( *beg && ((isalnum(*beg) && !ispunct(*beg)) || *beg=='_') ) beg++; + while ( *beg && ((isalnum_c(*beg) && !ispunct_c(*beg)) || *beg=='_') ) beg++; if ( *beg!='(' ) error("[%s:%d] Could not parse the expression: %s\n", __FILE__,__LINE__,str); // the subroutine name @@ -3900,7 +3900,7 @@ static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error } else if ( !len ) // all tokes read or an error { - if ( *tmp && !isspace(*tmp) ) error("Could not parse the expression: [%s]\n", str); + if ( *tmp && !isspace_c(*tmp) ) error("Could not parse the expression: [%s]\n", str); break; // all tokens read } else // TOK_VAL: annotation name or value diff --git a/mcall.c b/mcall.c index 13383787e..ced4f53de 100644 --- a/mcall.c +++ b/mcall.c @@ -31,6 +31,9 @@ THE SOFTWARE. */ #include "call.h" #include "prob1.h" +// Avoid having to include all of bcftools.h +static inline int isspace_c(char c) { return isspace((unsigned char) c); } + // Using priors for GTs does not seem to be mathematically justified. Although // it seems effective in removing false calls, it also flips a significant // proportion of HET genotypes. Better is to filter by FORMAT/GQ using @@ -308,10 +311,10 @@ static void init_sample_groups(call_t *call) for (i=0; isample_groups,lines[i]); char *tmp = ptr; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Could not parse the line in %s, expected a sample name followed by tab and a population name: %s\n",call->sample_groups,lines[i]); *tmp = 0; int ismpl = bcf_hdr_id2int(call->hdr, BCF_DT_SAMPLE, lines[i]); diff --git a/mpileup.c b/mpileup.c index 00d21dbe5..8afd9edd9 100644 --- a/mpileup.c +++ b/mpileup.c @@ -1074,7 +1074,7 @@ int read_file_list(const char *file_list,int *n,char **argv[]) { // allow empty lines and trailing spaces len = strlen(buf); - while ( len>0 && isspace(buf[len-1]) ) len--; + while ( len>0 && isspace_c(buf[len-1]) ) len--; if ( !len ) continue; // check sanity of the file list @@ -1084,7 +1084,7 @@ int read_file_list(const char *file_list,int *n,char **argv[]) // no such file, check if it is safe to print its name int i, safe_to_print = 1; for (i=0; itmp_str.l = 0; kputsn(ss,se-ss,&ploidy->tmp_str); @@ -102,7 +102,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg } ss = se; - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; if ( !*se ) error("Could not parse: %s\n", line); sp->ploidy = strtol(ss,&se,10); if ( ss==se ) error("Could not parse: %s\n", line); @@ -163,13 +163,13 @@ ploidy_t *ploidy_init_string(const char *str, int dflt) const char *ss = str; while ( *ss ) { - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; const char *se = ss; while ( *se && *se!='\r' && *se!='\n' ) se++; tmp.l = 0; kputsn(ss, se-ss, &tmp); regidx_insert(pld->idx,tmp.s); - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; } free(tmp.s); diff --git a/plugins/fill-tags.c b/plugins/fill-tags.c index 473bc1580..c803e617c 100644 --- a/plugins/fill-tags.c +++ b/plugins/fill-tags.c @@ -171,14 +171,14 @@ void parse_samples(args_t *args, char *fname) // NA12400 GRP1 // NA18507 GRP1,GRP2 char *pop_names = str.s + str.l - 1; - while ( pop_names >= str.s && isspace(*pop_names) ) pop_names--; + while ( pop_names >= str.s && isspace_c(*pop_names) ) pop_names--; if ( pop_names <= str.s ) error("Could not parse the file: %s\n", str.s); pop_names[1] = 0; // trailing spaces - while ( pop_names >= str.s && !isspace(*pop_names) ) pop_names--; + while ( pop_names >= str.s && !isspace_c(*pop_names) ) pop_names--; if ( pop_names <= str.s ) error("Could not parse the file: %s\n", str.s); char *smpl = pop_names++; - while ( smpl >= str.s && isspace(*smpl) ) smpl--; + while ( smpl >= str.s && isspace_c(*smpl) ) smpl--; if ( smpl <= str.s+1 ) error("Could not parse the file: %s\n", str.s); smpl[1] = 0; smpl = str.s; diff --git a/plugins/fixploidy.c b/plugins/fixploidy.c index a6bc7757e..2cb6afbd1 100644 --- a/plugins/fixploidy.c +++ b/plugins/fixploidy.c @@ -87,11 +87,11 @@ void set_samples(char *fname, bcf_hdr_t *hdr, ploidy_t *ploidy, int *sample2sex) while ( hts_getline(fp, KS_SEP_LINE, &tmp) > 0 ) { char *ss = tmp.s; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) error("Could not parse: %s\n", tmp.s); if ( *ss=='#' ) continue; char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; char x = *se; *se = 0; int ismpl = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, ss); @@ -99,10 +99,10 @@ void set_samples(char *fname, bcf_hdr_t *hdr, ploidy_t *ploidy, int *sample2sex) *se = x; ss = se+1; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) error("Could not parse: %s\n", tmp.s); se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( se==ss ) error("Could not parse: %s\n", tmp.s); sample2sex[ismpl] = ploidy_add_sex(ploidy, ss); diff --git a/plugins/fixref.c b/plugins/fixref.c index abb3367ea..b38ebbf12 100644 --- a/plugins/fixref.c +++ b/plugins/fixref.c @@ -278,7 +278,7 @@ static bcf1_t *set_ref_alt(args_t *args, bcf1_t *rec, const char ref, const char static inline int nt2int(char nt) { - nt = toupper(nt); + nt = toupper_c(nt); if ( nt=='A' ) return 0; if ( nt=='C' ) return 1; if ( nt=='G' ) return 2; diff --git a/plugins/gvcfz.c b/plugins/gvcfz.c index 91d43f159..4fcc24e21 100644 --- a/plugins/gvcfz.c +++ b/plugins/gvcfz.c @@ -130,7 +130,7 @@ static void init_groups(args_t *args) char *rmme_str = strdup(args->group_by), *beg = rmme_str; while ( *beg ) { - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !beg ) break; char *end = beg; while ( *end && *end!=':' ) end++; @@ -155,8 +155,8 @@ static void init_groups(args_t *args) if ( !strcmp(flt,"PASS") ) grp->flt_id = -1; // remove trailing spaces - beg = grp->expr + strlen(grp->expr); while ( beg >= grp->expr && isspace(*beg) ) { *beg = 0; beg--; } - beg = grp->expr; while ( *beg && isspace(*beg) ) beg++; + beg = grp->expr + strlen(grp->expr); while ( beg >= grp->expr && isspace_c(*beg) ) { *beg = 0; beg--; } + beg = grp->expr; while ( *beg && isspace_c(*beg) ) beg++; grp->flt = strcmp("-",beg) ? filter_init(args->hdr_in, grp->expr) : NULL; diff --git a/plugins/mendelian2.c b/plugins/mendelian2.c index 837b97da2..0347cd0e2 100644 --- a/plugins/mendelian2.c +++ b/plugins/mendelian2.c @@ -235,12 +235,12 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ // eat any leading spaces char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip empty lines // sex id, e.g. 1X or 2X char keep, *tmp, *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not parse the sex ID in the region line: %s\n", line); keep = *se; *se = 0; @@ -252,13 +252,13 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ sex_id = args->nsex_id++; } *se = keep; - while ( *se && isdigit(*se) ) se++; - while ( *se && isspace(*se) ) se++; + while ( *se && isdigit_c(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; // chromosome name, beg, end - while ( se[1] && !isspace(se[1]) ) se++; - while ( se > ss && isdigit(*se) ) se--; + while ( se[1] && !isspace_c(se[1]) ) se++; + while ( se > ss && isdigit_c(*se) ) se--; if ( *se!='-' ) error("Could not parse the region: %s\n",line); *end = strtol(se+1, &tmp, 10) - 1; if ( tmp==se+1 ) error("Could not parse the region: %s\n",line); @@ -270,8 +270,8 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ *chr_end = se-1; // skip region - while ( *ss && !isspace(*ss) ) ss++; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && !isspace_c(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; rule_t *rule = (rule_t*) payload; rule->sex_id = sex_id; @@ -279,7 +279,7 @@ static int parse_rules(const char *line, char **chr_beg, char **chr_end, uint32_ rule->ploidy = 0; // alleles inherited from mother (M), father (F), both (MF), none (.) - while ( *ss && !isspace(*ss) ) + while ( *ss && !isspace_c(*ss) ) { if ( *ss=='M' ) { rule->inherits |= 1<ploidy++; } else if ( *ss=='F' ) { rule->inherits |= 1<ploidy++; } diff --git a/plugins/scatter.c b/plugins/scatter.c index ec68e56c9..2afd0e04c 100644 --- a/plugins/scatter.c +++ b/plugins/scatter.c @@ -117,12 +117,12 @@ void mkdir_p(const char *fmt, ...) HTS_FORMAT(HTS_PRINTF_FMT, 1, 2); int regidx_parse_reg_name(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && *se!=':' && !isspace(*se) ) se++; + while ( *se && *se!=':' && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -140,9 +140,9 @@ int regidx_parse_reg_name(const char *line, char **chr_beg, char **chr_end, uint if ( *beg==0 ) { fprintf(stderr,"Could not parse reg line, expected 1-based coordinate: %s\n", line); return -2; } (*beg)--; - if ( !se[0] || isspace(se[0])) { + if ( !se[0] || isspace_c(se[0])) { *end = *beg; - } else if ( se[0] == '-' && (!se[1] || isspace(se[1])) ) { + } else if ( se[0] == '-' && (!se[1] || isspace_c(se[1])) ) { *end = MAX_COOR_0; se++; } else { @@ -155,7 +155,7 @@ int regidx_parse_reg_name(const char *line, char **chr_beg, char **chr_end, uint } ss = se; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !ss[0] ) ss = (char *)line; int *idx = (int *)payload; @@ -180,7 +180,7 @@ static void open_set(subset_t *set, args_t *args) int k, l = args->str.l; if (args->prefix) kputs(args->prefix, &args->str); kputs(set->fname, &args->str); - for (k=l; kstr.l; k++) if ( isspace(args->str.s[k]) ) args->str.s[k] = '_'; + for (k=l; kstr.l; k++) if ( isspace_c(args->str.s[k]) ) args->str.s[k] = '_'; if ( args->output_type & FT_BCF ) kputs(".bcf", &args->str); else if ( args->output_type & FT_GZ ) kputs(".vcf.gz", &args->str); else kputs(".vcf", &args->str); diff --git a/plugins/setGT.c b/plugins/setGT.c index 6495006c3..7340f7f74 100644 --- a/plugins/setGT.c +++ b/plugins/setGT.c @@ -162,12 +162,12 @@ void parse_binom_expr(args_t *args, char *str) if ( str[1]!=':' ) _parse_binom_expr_error(str); char *beg = str+2; - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !*beg ) _parse_binom_expr_error(str); char *end = beg; while ( *end ) { - if ( isspace(*end) || *end=='<' || *end=='=' || *end=='>' ) break; + if ( isspace_c(*end) || *end=='<' || *end=='=' || *end=='>' ) break; end++; } if ( !*end ) _parse_binom_expr_error(str); @@ -176,7 +176,7 @@ void parse_binom_expr(args_t *args, char *str) int tag_id = bcf_hdr_id2int(args->in_hdr,BCF_DT_ID,args->binom_tag); if ( !bcf_hdr_idinfo_exists(args->in_hdr,BCF_HL_FMT,tag_id) ) error("The FORMAT tag \"%s\" is not present in the VCF\n", args->binom_tag); - while ( *end && isspace(*end) ) end++; + while ( *end && isspace_c(*end) ) end++; if ( !*end ) _parse_binom_expr_error(str); if ( !strncmp(end,"<=",2) ) { args->binom_cmp = cmp_le; beg = end+2; } @@ -187,11 +187,11 @@ void parse_binom_expr(args_t *args, char *str) else if ( !strncmp(end,"=",1) ) { args->binom_cmp = cmp_eq; beg = end+1; } else _parse_binom_expr_error(str); - while ( *beg && isspace(*beg) ) beg++; + while ( *beg && isspace_c(*beg) ) beg++; if ( !*beg ) _parse_binom_expr_error(str); args->binom_val = strtod(beg, &end); - while ( *end && isspace(*end) ) end++; + while ( *end && isspace_c(*end) ) end++; if ( *end ) _parse_binom_expr_error(str); args->tgt_mask |= GT_BINOM; diff --git a/plugins/split-vep.c b/plugins/split-vep.c index 1a34bef4e..d0ac6433d 100644 --- a/plugins/split-vep.c +++ b/plugins/split-vep.c @@ -341,7 +341,7 @@ static void expand_csq_expression(args_t *args, kstring_t *str) char *ptr = strstr(args->format_str,str->s); if ( !ptr ) return; char *end = ptr + str->l, tmp = *end; - if ( isalnum(tmp) || tmp=='_' || tmp=='.' ) return; + if ( isalnum_c(tmp) || tmp=='_' || tmp=='.' ) return; *end = 0; str->l = 0; @@ -396,11 +396,11 @@ static void init_column2type(args_t *args) tmp.l = 0; kputc('^',&tmp); char *ptr = type[i]; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Error: failed to parse the column type \"%s\"\n",type[i]); kputsn(type[i],ptr-type[i],&tmp); kputc('$',&tmp); - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Error: failed to parse the column type \"%s\"\n",type[i]); args->ncolumn2type++; args->column2type = (col2type_t*) realloc(args->column2type,sizeof(*args->column2type)*args->ncolumn2type); @@ -463,7 +463,7 @@ static int query_has_field(char *fmt, char *field, kstring_t *str) ptr = strstr(ptr,str->s); if ( !ptr ) return 0; end = ptr[str->l]; - if ( isalnum(end) || end=='_' || end=='.' ) + if ( isalnum_c(end) || end=='_' || end=='.' ) { ptr++; continue; @@ -948,13 +948,13 @@ static void init_data(args_t *args) { if ( *ep=='#' ) { - while ( *ep && *ep!='\n' ) { *ep = tolower(*ep); ep++; } + while ( *ep && *ep!='\n' ) { *ep = tolower_c(*ep); ep++; } if ( !*ep ) break; ep++; continue; } char *bp = ep; - while ( *ep && !isspace(*ep) ) { *ep = tolower(*ep); ep++; } + while ( *ep && !isspace_c(*ep) ) { *ep = tolower_c(*ep); ep++; } char tmp = *ep; *ep = 0; args->nscale++; @@ -965,7 +965,7 @@ static void init_data(args_t *args) if ( !tmp ) break; if ( tmp=='\n' ) severity++; ep++; - while ( *ep && isspace(*ep) ) ep++; + while ( *ep && isspace_c(*ep) ) ep++; } // Transcript and consequence selection @@ -1098,7 +1098,7 @@ static void csq_to_severity(args_t *args, char *csq, int *min_severity, int *max while ( *ep ) { char *bp = ep; - while ( *ep && *ep!='&' ) { *ep = tolower(*ep); ep++; } + while ( *ep && *ep!='&' ) { *ep = tolower_c(*ep); ep++; } char tmp = *ep; *ep = 0; diff --git a/plugins/split.c b/plugins/split.c index 93d04a971..bbab6a666 100644 --- a/plugins/split.c +++ b/plugins/split.c @@ -203,7 +203,7 @@ void init_subsets(args_t *args) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; if ( *ptr==',' ) set->nsmpl++; // todo: allow commas in sample names kputc(*ptr, &str); escaped = 0; @@ -228,16 +228,16 @@ void init_subsets(args_t *args) } if ( !j ) continue; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; j = 0; if ( *ptr ) // optional second column with new sample names { set->rename = (char**) calloc(set->nsmpl, sizeof(*set->rename)); beg = ptr; - while ( *beg && !isspace(*beg) ) + while ( *beg && !isspace_c(*beg) ) { ptr = beg; - while ( *ptr && *ptr!=',' && !isspace(*ptr) ) ptr++; + while ( *ptr && *ptr!=',' && !isspace_c(*ptr) ) ptr++; char tmp = *ptr; *ptr = 0; if ( !strcmp("-",beg) ) @@ -248,7 +248,7 @@ void init_subsets(args_t *args) } set->rename[j++] = strdup(beg); *ptr = tmp; - if ( !tmp || isspace(tmp) ) break; + if ( !tmp || isspace_c(tmp) ) break; beg = ptr + 1; if ( j >= set->nsmpl ) error("Expected the same number of samples in the first and second column: %s\n",files[i]); @@ -262,7 +262,7 @@ void init_subsets(args_t *args) } } - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( *ptr ) // optional third column with file name { free(set->fname); @@ -293,7 +293,7 @@ void init_subsets(args_t *args) while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; escaped = 0; ptr++; } @@ -311,18 +311,18 @@ void init_subsets(args_t *args) if ( tmp ) // two columns: new sample name { rename = ptr + 1; - while ( *rename && isspace(*rename) ) rename++; + while ( *rename && isspace_c(*rename) ) rename++; if ( !*rename ) rename = NULL; // trailing space else { ptr = rename; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; tmp = *ptr; *ptr = 0; if ( !strcmp("-",rename) ) rename = NULL; if ( tmp ) ptr++; } - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; } if ( !*ptr ) // no third column, use sample name as file name diff --git a/plugins/vrfs.c b/plugins/vrfs.c index e851003fa..eaddbdaaa 100644 --- a/plugins/vrfs.c +++ b/plugins/vrfs.c @@ -161,12 +161,12 @@ static int parse_sites(const char *line, char **chr_beg, char **chr_end, uint32_ // CHR part char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -180,9 +180,9 @@ static int parse_sites(const char *line, char **chr_beg, char **chr_end, uint32_ (*beg)--; // REF part and REF length - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int ref_len = se - ss; if ( !ref_len ) error("Could not parse the REF part of the line: %s\n",line); *end = *beg; // we are interested in overlaps at the POS only, not variant length @@ -195,9 +195,9 @@ static int parse_sites(const char *line, char **chr_beg, char **chr_end, uint32_ site->dist = calloc(args->profile.nbins,sizeof(*site->dist)); // ALT part - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int alt_len = se - ss; if ( !alt_len ) error("Could not parse the ALT part of the line: %s\n",line); site->alt = malloc(alt_len+1); @@ -738,7 +738,7 @@ static double *parse_float_array(const char *line, int *narray) const char *ptr = line; while ( *ptr ) { - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; n++; if ( *ptr ) ptr++; } @@ -748,7 +748,7 @@ static double *parse_float_array(const char *line, int *narray) { char *tmp; array[i] = strtod(ptr,&tmp); - if ( *tmp && !isspace(*tmp) ) error("Could not parse the float array: %s\n",line); + if ( *tmp && !isspace_c(*tmp) ) error("Could not parse the float array: %s\n",line); ptr = tmp+1; } *narray = n; @@ -771,11 +771,11 @@ static int parse_batch(const char *line, char **chr_beg, char **chr_end, uint32_ // CHR part char *ss = (char*) line + 5; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -2; // unexpected format char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -790,7 +790,7 @@ static int parse_batch(const char *line, char **chr_beg, char **chr_end, uint32_ // REF part ss = ++se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int ref_len = se - ss; *end = *beg; @@ -802,18 +802,18 @@ static int parse_batch(const char *line, char **chr_beg, char **chr_end, uint32_ // ALT part ss = ++se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; int alt_len = se - ss; site->alt = malloc(alt_len+1); strncpy(site->alt,ss,alt_len); site->alt[alt_len] = 0; // skip the SCORE part - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not parse the SCORE part of the line: %s\n",line); - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; if ( !*se ) error("Could not parse the SCORE part of the line: %s\n",line); // read the PROFILE part diff --git a/regidx.c b/regidx.c index 445d7d585..40d909896 100644 --- a/regidx.c +++ b/regidx.c @@ -29,6 +29,9 @@ #include #include "regidx.h" +// Avoid having to include all of bcftools.h +static inline int isspace_c(char c) { return isspace((unsigned char) c); } + #define MAX_COOR_0 REGIDX_MAX // CSI and hts_itr_query limit, 0-based #define iBIN(x) ((x)>>13) @@ -211,13 +214,13 @@ regidx_t *regidx_init_string(const char *str, regidx_parse_f parser, regidx_free const char *ss = str; while ( *ss ) { - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; const char *se = ss; while ( *se && *se!='\r' && *se!='\n' ) se++; tmp.l = 0; kputsn(ss, se-ss, &tmp); regidx_insert(idx,tmp.s); - while ( *se && isspace(*se) ) se++; + while ( *se && isspace_c(*se) ) se++; ss = se; } free(tmp.s); @@ -476,12 +479,12 @@ int regidx_overlap(regidx_t *regidx, const char *chr, uint32_t beg, uint32_t end int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -508,12 +511,12 @@ int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, uint32_t int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -538,7 +541,7 @@ int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, uint32_t { ss = se+1; *end = strtod(ss, &se); - if ( ss==se || (*se && !isspace(*se)) ) *end = *beg; + if ( ss==se || (*se && !isspace_c(*se)) ) *end = *beg; else if ( *end==0 ) { fprintf(stderr,"Could not parse tab line, expected 1-based coordinate: %s\n", line); return -2; } else (*end)--; } @@ -555,7 +558,7 @@ int regidx_parse_vcf(const char *line, char **chr_beg, char **chr_end, uint32_t int regidx_parse_reg(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) return -1; // skip blank lines if ( *ss=='#' ) return -1; // skip comments diff --git a/reheader.c b/reheader.c index df631aae3..bb94e7a22 100644 --- a/reheader.c +++ b/reheader.c @@ -78,10 +78,10 @@ static char *copy_and_update_contig_line(faidx_t *fai, char *line, void *chr_see p = ++q; while ( *q && (*q==' ' || *q=='\t') ) { p++; q++; } // ^[A-Za-z_][0-9A-Za-z_.]*$ - if (p==q && *q && (isalpha(*q) || *q=='_')) + if (p==q && *q && (isalpha_c(*q) || *q=='_')) { q++; - while ( *q && (isalnum(*q) || *q=='_' || *q=='.') ) q++; + while ( *q && (isalnum_c(*q) || *q=='_' || *q=='.') ) q++; } int n = q-p; int m = 0; @@ -228,7 +228,7 @@ static void read_header_file(char *fname, kstring_t *hdr) if ( hts_close(fp) ) error("Close failed: %s\n", fname); free(tmp.s); - while ( hdr->l>0 && isspace(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines + while ( hdr->l>0 && isspace_c(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines kputc('\n',hdr); } @@ -248,17 +248,17 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &key); escaped = 0; ptr++; } if ( !*ptr ) break; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } - if ( isspace(*ptr) && !escaped ) break; + if ( isspace_c(*ptr) && !escaped ) break; kputc(*ptr, &val); escaped = 0; ptr++; @@ -273,7 +273,7 @@ static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int id return 0; } - while ( hdr->l>0 && isspace(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines + while ( hdr->l>0 && isspace_c(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines hdr->s[hdr->l] = 0; kstring_t tmp = {0,0,0}; diff --git a/smpl_ilist.c b/smpl_ilist.c index 4bc4cec2e..7a0d5e275 100644 --- a/smpl_ilist.c +++ b/smpl_ilist.c @@ -40,7 +40,7 @@ void smpl_ilist_destroy(smpl_ilist_t *smpl) static inline int is_space_or_escaped(const char *min, const char *str) { - if ( !isspace(*str) ) return 0; + if ( !isspace_c(*str) ) return 0; int n = 0; while ( --str>=min && *str=='\\' ) n++; return n%2 ? 0 : 1; diff --git a/str_finder.c b/str_finder.c index a9281d811..cf9e4bdf9 100644 --- a/str_finder.c +++ b/str_finder.c @@ -94,7 +94,7 @@ static void add_rep(rep_ele **list, char *cons, int clen, int pos, int rlen, if (lower_only) { int lc = 0; for (i = el->start; i <= el->end; i++) { - if (islower(cons[i])) { + if (islower((unsigned char) cons[i])) { lc = 1; break; } diff --git a/test/test-regidx.c b/test/test-regidx.c index eed36ab74..8ec261d61 100644 --- a/test/test-regidx.c +++ b/test/test-regidx.c @@ -37,6 +37,9 @@ #include #include "regidx.h" +// Avoid having to include all of bcftools.h +static inline int isspace_c(char c) { return isspace((unsigned char) c); } + static int verbose = 0; void HTS_FORMAT(HTS_PRINTF_FMT, 1, 2) @@ -75,18 +78,18 @@ int custom_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg // Skip the fields that were parsed above char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; for (i=0; i<3; i++) { - while ( *ss && !isspace(*ss) ) ss++; + while ( *ss && !isspace_c(*ss) ) ss++; if ( !*ss ) return -2; // wrong number of fields - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; } if ( !*ss ) return -2; // Parse the payload char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; char **dat = (char**) payload; *dat = (char*) malloc(se-ss+1); memcpy(*dat,ss,se-ss+1); diff --git a/tsv2vcf.c b/tsv2vcf.c index 22dec3065..89b972088 100644 --- a/tsv2vcf.c +++ b/tsv2vcf.c @@ -79,14 +79,14 @@ int tsv_parse(tsv_t *tsv, bcf1_t *rec, char *str) tsv->ss = tsv->se = str; while ( *tsv->ss && tsv->icol < tsv->ncols ) { - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; if ( tsv->cols[tsv->icol].setter ) { int ret = tsv->cols[tsv->icol].setter(tsv,rec,tsv->cols[tsv->icol].usr); if ( ret<0 ) return -1; status++; } - while ( *tsv->se && isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && isspace_c(*tsv->se) ) tsv->se++; tsv->ss = tsv->se; tsv->icol++; } @@ -123,7 +123,7 @@ int tsv_setter_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr) { bcf_hdr_t *hdr = (bcf_hdr_t*)usr; char *sb = tsv->ss; - while ( *sb && !isspace(*sb) ) sb++; + while ( *sb && !isspace_c(*sb) ) sb++; if ( !*sb ) return -1; char tmp = *sb; *sb = ','; diff --git a/tsv2vcf.h b/tsv2vcf.h index 68757d459..53964adcf 100644 --- a/tsv2vcf.h +++ b/tsv2vcf.h @@ -27,6 +27,7 @@ #define __TSV2VCF_H__ #include +#include "bcftools.h" typedef struct _tsv_t tsv_t; typedef int (*tsv_setter_t)(tsv_t *, bcf1_t *, void *); @@ -65,12 +66,12 @@ static inline int tsv_next(tsv_t *tsv) if ( !*tsv->se ) return -1; if ( tsv->ss==tsv->se ) { - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; return 0; } - while ( *tsv->se && isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && isspace_c(*tsv->se) ) tsv->se++; tsv->ss = tsv->se; - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; return 0; } diff --git a/vcfannotate.c b/vcfannotate.c index 7e19fe0f1..47e70d82f 100644 --- a/vcfannotate.c +++ b/vcfannotate.c @@ -2229,12 +2229,12 @@ static void init_columns(args_t *args) for (i=0; imerge_method_str.l ) kputc(',',&args->merge_method_str); @@ -2834,7 +2834,7 @@ static void rename_chrs(args_t *args, char *fname) for (i=0; ihdr_out, map[i]); @@ -2844,9 +2844,9 @@ static void rename_chrs(args_t *args, char *fname) assert( j>=0 ); free(hrec->vals[j]); ss++; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *se = 0; hrec->vals[j] = strdup(ss); args->hdr_out->id[BCF_DT_CTG][rid].key = hrec->vals[j]; @@ -2891,7 +2891,7 @@ static int rename_annots_core(args_t *args, char *ori_tag, char *new_tag) assert( j>=0 ); free(hrec->vals[j]); char *ptr = new_tag; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; *ptr = 0; hrec->vals[j] = strdup(new_tag); args->hdr_out->id[BCF_DT_ID][id].key = hrec->vals[j]; @@ -2908,12 +2908,12 @@ static void rename_annots(args_t *args) for (i=0; irename_annots_nmap; i++) { char *ptr = args->rename_annots_map[i]; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Could not parse: %s\n", args->rename_annots_map[i]); char *rmme = ptr; *ptr = 0; ptr++; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; if ( !*ptr ) { *rmme = ' '; error("Could not parse: %s\n", args->rename_annots_map[i]); } if ( rename_annots_core(args, args->rename_annots_map[i], ptr) < 0 ) error("Cannot rename \"%s\" to \"%s\"\n",args->rename_annots_map[i],ptr); diff --git a/vcfcall.c b/vcfcall.c index 7ea143666..063332b40 100644 --- a/vcfcall.c +++ b/vcfcall.c @@ -218,11 +218,11 @@ static char **parse_ped_samples(args_t *args, call_t *call, char **vals, int nva j = 0; while ( *tmp && j<5 ) { - if ( isspace(*tmp) ) + if ( isspace_c(*tmp) ) { *tmp = 0; ++tmp; - while ( isspace(*tmp) ) tmp++; // allow multiple spaces + while ( isspace_c(*tmp) ) tmp++; // allow multiple spaces col_ends[j] = tmp-1; j++; continue; @@ -312,11 +312,11 @@ static void set_samples(args_t *args, const char *fn, int is_file) for (i=0; iaux.hdr, BCF_DT_SAMPLE, ss); @@ -324,10 +324,10 @@ static void set_samples(args_t *args, const char *fn, int is_file) if ( old2new[ismpl] != -1 ) { fprintf(stderr,"Warning: The sample is listed multiple times: %s\n",ss); continue; } ss = se+(x != '\0'); - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) ss = "2"; // default ploidy se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( se==ss ) { *xptr = x; error("Could not parse: \"%s\"\n", lines[i]); } char *sex = ss; @@ -354,11 +354,11 @@ static void set_samples(args_t *args, const char *fn, int is_file) for (i=0; iaux.hdr, BCF_DT_SAMPLE, ss); @@ -416,12 +416,12 @@ static void init_missed_line(args_t *args) static int tgt_parse(const char *line, char **chr_beg, char **chr_end, uint32_t *beg, uint32_t *end, void *payload, void *usr) { char *ss = (char*) line; - while ( *ss && isspace(*ss) ) ss++; + while ( *ss && isspace_c(*ss) ) ss++; if ( !*ss ) { fprintf(stderr,"Could not parse the line: %s\n", line); return -2; } if ( *ss=='#' ) return -1; // skip comments char *se = ss; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; *chr_beg = ss; *chr_end = se-1; @@ -445,14 +445,14 @@ static int tgt_parse(const char *line, char **chr_beg, char **chr_end, uint32_t while ( *ss ) { se = ss; - while ( *se && *se!=',' && !isspace(*se) ) se++; + while ( *se && *se!=',' && !isspace_c(*se) ) se++; als->n++; als->allele = (char**)realloc(als->allele,als->n*sizeof(*als->allele)); als->allele[als->n-1] = (char*)malloc(se-ss+1); memcpy(als->allele[als->n-1],ss,se-ss); als->allele[als->n-1][se-ss] = 0; ss = se+1; - if ( !*se || isspace(*se) ) break; + if ( !*se || isspace_c(*se) ) break; } if ( als->n<2 ) error("Unable to parse the -T file; expected CHROM\\tPOS\\tREF,ALT with -C alleles but found instead:\n\t%s\n",line); return 0; diff --git a/vcfconvert.c b/vcfconvert.c index b01742ae6..db09166a5 100644 --- a/vcfconvert.c +++ b/vcfconvert.c @@ -201,8 +201,8 @@ static int _set_chrom_pos_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr) if ( *se!='_' ) return -1; kputsn(ss,se-ss,&args->str); ss = ++se; - while ( se < tsv->se && *se!='_' && isspace(*tsv->se) ) se++; - if ( se < tsv->se && *se!='_' && isspace(*tsv->se) ) return -1; + while ( se < tsv->se && *se!='_' && isspace_c(*tsv->se) ) se++; + if ( se < tsv->se && *se!='_' && isspace_c(*tsv->se) ) return -1; kputc(',',&args->str); kputsn(ss,se-ss,&args->str); @@ -275,9 +275,9 @@ static int tsv_setter_verify_ref_alt(tsv_t *tsv, bcf1_t *rec, void *usr) args->rev_als = 1; } *tsv->se = tmp; - while ( *tsv->se && isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && isspace_c(*tsv->se) ) tsv->se++; tsv->ss = tsv->se; - while ( *tsv->se && !isspace(*tsv->se) ) tsv->se++; + while ( *tsv->se && !isspace_c(*tsv->se) ) tsv->se++; tmp = *tsv->se; *tsv->se = 0; if ( !args->rev_als && strcmp(tsv->ss,rec->d.allele[1]) ) { *tsv->se = tmp; error("REF/ALT mismatch: [%s][%s]\n", tsv->ss,rec->d.allele[1]); } else if ( args->rev_als && strcmp(tsv->ss,rec->d.allele[0]) ) { *tsv->se = tmp; error("REF/ALT mismatch: [%s][%s]\n", tsv->ss,rec->d.allele[0]); } @@ -436,7 +436,7 @@ static void gensample_to_vcf(args_t *args) // of the columns (CHROM:POS_REF_ALT comes first or second) args->str.l = 0; char *sb = line.s, *se = line.s; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", gen_fname,line.s); if ( args->gen_3N6 ) // first column, just CHROM kputsn(sb, se-sb, &args->str); @@ -445,7 +445,7 @@ static void gensample_to_vcf(args_t *args) char *sc = strchr(sb,':'); if ( !sc || sc > se ) { - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", gen_fname,line.s); sb = ++se; sc = strchr(sb,':'); @@ -482,7 +482,7 @@ static void gensample_to_vcf(args_t *args) if ( !samples ) error("Could not read %s\n", sample_fname); for (i=2; iheader,samples[i]); } @@ -627,7 +627,7 @@ static void haplegendsample_to_vcf(args_t *args) // returned from hts_readlist (i=1, and not i=0) for (i=1; iheader,samples[i]); } @@ -736,13 +736,13 @@ static void hapsample_to_vcf(args_t *args) // Find out the chromosome name, it can be either in the first or second column args->str.l = 0; char *sb = line.s, *se = line.s; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", hap_fname,line.s); if ( !args->output_vcf_ids ) { // first column should be just CHROM, but the second must be CHROM:POS_REF_ALT, use that sb = ++se; - while ( *se && !isspace(*se) ) se++; + while ( *se && !isspace_c(*se) ) se++; if ( !*se ) error("Could not determine CHROM in %s: %s\n", hap_fname,line.s); if ( !strchr(sb,':') ) error("Could not determine CHROM in the second column of %s: %s\n", hap_fname,line.s); @@ -781,7 +781,7 @@ static void hapsample_to_vcf(args_t *args) if ( !samples ) error("Could not read %s\n", sample_fname); for (i=2; iheader,samples[i]); } @@ -847,13 +847,13 @@ char *init_sample2sex(bcf_hdr_t *hdr, char *sex_fname) if ( !lines ) error("Could not read %s\n", sex_fname); for (i=0; iheader,rec->rid),(int64_t) rec->pos+1); int nals = 1, alleles[5] = { -1, -1, -1, -1, -1 }; // a,c,g,t,n - ref[0] = toupper(ref[0]); + ref[0] = toupper_c(ref[0]); int iref = acgt_to_5(ref[0]); alleles[iref] = 0; diff --git a/vcfgtcheck.c b/vcfgtcheck.c index d4999f1d4..1ce426ba0 100644 --- a/vcfgtcheck.c +++ b/vcfgtcheck.c @@ -359,13 +359,13 @@ static void init_data(args_t *args) for (i=0; inpairs; i++) { char *ptr = tmp[i]; - while ( *ptr && !isspace(*ptr) ) ptr++; + while ( *ptr && !isspace_c(*ptr) ) ptr++; if ( !*ptr ) error("Could not parse %s: %s\n",args->pair_samples,tmp[i]); *ptr = 0; args->pairs[i].iqry = bcf_hdr_id2int(args->qry_hdr, BCF_DT_SAMPLE, tmp[i]); if ( args->pairs[i].iqry < 0 ) error("No such sample in %s: [%s]\n",args->qry_fname,tmp[i]); ptr++; - while ( *ptr && isspace(*ptr) ) ptr++; + while ( *ptr && isspace_c(*ptr) ) ptr++; args->pairs[i].igt = bcf_hdr_id2int(args->gt_hdr?args->gt_hdr:args->qry_hdr, BCF_DT_SAMPLE, ptr); if ( args->pairs[i].igt < 0 ) error("No such sample in %s: [%s]\n",args->gt_fname?args->gt_fname:args->qry_fname,ptr); free(tmp[i]); diff --git a/vcfisec.c b/vcfisec.c index 51750c170..2b2691a29 100644 --- a/vcfisec.c +++ b/vcfisec.c @@ -630,7 +630,7 @@ int main_vcfisec(int argc, char *argv[]) else if ( *p=='+' ) { args->isec_op = OP_PLUS; p++; } else if ( *p=='=' ) { args->isec_op = OP_EQUAL; p++; } else if ( *p=='~' ) { args->isec_op = OP_EXACT; p++; } - else if ( isdigit(*p) ) args->isec_op = OP_EQUAL; + else if ( isdigit_c(*p) ) args->isec_op = OP_EQUAL; else error("Could not parse --nfiles %s\n", optarg); if ( args->isec_op == OP_EXACT ) args->isec_exact = p; else if ( sscanf(p,"%d",&args->isec_n)!=1 ) error("Could not parse --nfiles %s\n", optarg); diff --git a/vcfmerge.c b/vcfmerge.c index ce6a71c9c..9f1a8e9ea 100644 --- a/vcfmerge.c +++ b/vcfmerge.c @@ -793,12 +793,12 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb) for (i=0; in_allele; i++) { - if ( toupper(als[0].s[ als[0].l-1 ]) != toupper(als[i].s[ als[i].l-1 ]) ) break; + if ( toupper_c(als[0].s[ als[0].l-1 ]) != toupper_c(als[i].s[ als[i].l-1 ]) ) break; if ( als[i].l < min_len ) min_len = als[i].l; } if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed @@ -490,7 +490,7 @@ static hts_pos_t realign_left(args_t *args, bcf1_t *line) int min_len = als[0].l - ntrim_left; for (i=1; in_allele; i++) { - if ( toupper(als[0].s[ntrim_left]) != toupper(als[i].s[ntrim_left]) ) break; + if ( toupper_c(als[0].s[ntrim_left]) != toupper_c(als[i].s[ntrim_left]) ) break; if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left; } if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed @@ -524,7 +524,7 @@ static hts_pos_t realign_right(args_t *args, bcf1_t *line) for (i=1; in_allele; i++) { if ( als[0].l!=als[i].l ) has_indel = 1; - if ( toupper(als[0].s[ntrim_left]) != toupper(als[i].s[ntrim_left]) ) break; + if ( toupper_c(als[0].s[ntrim_left]) != toupper_c(als[i].s[ntrim_left]) ) break; if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left; } if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed further @@ -559,7 +559,7 @@ static hts_pos_t realign_right(args_t *args, bcf1_t *line) int min_len = als[0].l; for (i=1; in_allele; i++) { - if ( toupper(als[0].s[ als[0].l-1 ]) != toupper(als[i].s[ als[i].l-1 ]) ) break; + if ( toupper_c(als[0].s[ als[0].l-1 ]) != toupper_c(als[i].s[ als[i].l-1 ]) ) break; if ( min_len > als[i].l ) min_len = als[i].l; } if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed more diff --git a/vcfroh.c b/vcfroh.c index 1b3eff91f..d4da445c1 100644 --- a/vcfroh.c +++ b/vcfroh.c @@ -408,7 +408,7 @@ static int load_genmap(args_t *args, const char *chr) // skip second column tmp++; - while ( *tmp && !isspace(*tmp) ) tmp++; + while ( *tmp && !isspace_c(*tmp) ) tmp++; // read the genetic map in cM, scale from % to likelihood gm->rate = strtod(tmp+1, &end); @@ -677,9 +677,9 @@ int read_AF(bcf_sr_regions_t *tgt, bcf1_t *line, double *alt_freq) str++; } *alt_freq = strtod(str, &tmp); - if ( *tmp && !isspace(*tmp) ) + if ( *tmp && !isspace_c(*tmp) ) { - if ( str[0]=='.' && (!str[1] || isspace(str[1])) ) return -1; // missing value + if ( str[0]=='.' && (!str[1] || isspace_c(str[1])) ) return -1; // missing value error("Could not parse: [%s]\n", tgt->line.s); } if ( *alt_freq<0 || *alt_freq>1 ) error("Could not parse AF: [%s]\n", tgt->line.s); diff --git a/vcmp.c b/vcmp.c index dbdc4b7ac..c5f9ec84a 100644 --- a/vcmp.c +++ b/vcmp.c @@ -30,6 +30,9 @@ THE SOFTWARE. */ #include #include "vcmp.h" +// Avoid having to include all of bcftools.h +static inline char toupper_c(char c) { return toupper((unsigned char) c); } + struct _vcmp_t { char *dref; @@ -57,7 +60,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) vcmp->ndref = 0; char *a = ref1, *b = ref2; - while ( *a && *b && toupper(*a)==toupper(*b) ) { a++; b++; } + while ( *a && *b && toupper_c(*a)==toupper_c(*b) ) { a++; b++; } if ( !*a && !*b ) return 0; if ( *a && *b ) return -1; // refs not compatible @@ -68,7 +71,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) while ( *a ) a++; vcmp->ndref = (a-ref1) - vcmp->nmatch; hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref); - for (i=0; indref; i++) vcmp->dref[i] = toupper(ref1[vcmp->nmatch+i]); + for (i=0; indref; i++) vcmp->dref[i] = toupper_c(ref1[vcmp->nmatch+i]); vcmp->dref[vcmp->ndref] = 0; return 0; } @@ -78,7 +81,7 @@ int vcmp_set_ref(vcmp_t *vcmp, char *ref1, char *ref2) while ( *b ) b++; vcmp->ndref = (b-ref2) - vcmp->nmatch; hts_expand(char,vcmp->ndref+1,vcmp->mdref,vcmp->dref); - for (i=0; indref; i++) vcmp->dref[i] = toupper(ref2[vcmp->nmatch+i]); + for (i=0; indref; i++) vcmp->dref[i] = toupper_c(ref2[vcmp->nmatch+i]); vcmp->dref[vcmp->ndref] = 0; vcmp->ndref *= -1; return 0; @@ -90,7 +93,7 @@ int vcmp_find_allele(vcmp_t *vcmp, char **als1, int nals1, char *al2) for (i=0; indref ) { @@ -103,14 +106,14 @@ int vcmp_find_allele(vcmp_t *vcmp, char **als1, int nals1, char *al2) { if ( vcmp->ndref<0 ) continue; for (j=0; jndref; j++) - if ( !a[j] || toupper(a[j])!=vcmp->dref[j] ) break; + if ( !a[j] || toupper_c(a[j])!=vcmp->dref[j] ) break; if ( j!=vcmp->ndref || a[j] ) continue; break; // found } if ( vcmp->ndref>0 ) continue; for (j=0; j<-vcmp->ndref; j++) - if ( !b[j] || toupper(b[j])!=vcmp->dref[j] ) break; + if ( !b[j] || toupper_c(b[j])!=vcmp->dref[j] ) break; if ( j!=-vcmp->ndref || b[j] ) continue; break; // found }