# include "Seq.h" # define TAB " " # define CH1 "#*" # define CH2 "#-" Statistic NIndVolStats(VolumeLookupTable * V, Sequences * S, int * select) { # define IS(a,b) a ## _ ## b = {0} Statistic IS(Vol,num), IS(Vol,mean), IS(Vol,WtAvg), IS(Vol,dev), Vol_FracDev = {0}, IS(Vol,WtDev), Vol_WtFracDev = {0}, IS(Vol,Var), IS(Vol,WtVar), IS(Vol,max), IS(Vol,min), IS(Natoms,WtAvg), IS(Natoms,WtDev), Natoms_WtFracDev = {0}, IS(Natoms,max), IS(Natoms,min); # define IS(a,b) a ## _ ## b int i; Statistic Vol = {0}, Natoms ={0}; char * FMT = CH1 " At %3s:" TAB "%5s" TAB "%5s" TAB "%5s" TAB "%5s" TAB "%4s" TAB "%5s" TAB "%4s" TAB "%5s" TAB "%5s" TAB "%4s" TAB "%4s" TAB "%4s" TAB "%5s" TAB "%4s" TAB "%4s" TAB "%4s" "\n" ; HELLO("NIndVolStats"); printf(FMT,"Pos","num", "","w", "sd","(%)","wtSD", "(%)", "Var","WtVar", "max","min", "NAtm","sd","(%)","max","min" ); while (*select !=0) { InitStats(&Vol); InitStats(&Natoms); for (i=0; i<= S->nseq; i++) { char s[2] = {0} ; *s = S->seq[i]->aa[*select] ; if (*s == '-') { fprintf(stderr, "SumVolumes: At position %d, which is selected, sequence %s has a gap.\n", *select,S->seq[i]->id); S->seq[i]->flag |= SEQ_BADVOL_FLAG ; } else if (IS_AA(*s)) { int j = IndexOfString(s,&(V->SI)); double wt = S->seq[i]->wt; IncStatsWithWts(&Vol,V->vol[j],wt); IncStatsWithWts(&Natoms,V->natoms[j],wt); } else { STDERR("ERROR! !(*s == '-' || IS_AA(*s))"); } } { char * FMT = CH2" At %3d:" TAB "%5.0f" TAB "%5.0f" TAB "%5.0f" TAB "%5.1f" TAB "%4.1f" TAB "%5.1f" TAB "%4.1f" TAB "%5.0f" TAB "%5.0f" TAB "%4.0f" TAB "%4.0f" TAB "%4.0f" TAB "%5.1f" TAB "%4.1f" TAB "%4.0f" TAB "%4.0f" "\n" ; printf(FMT, *select,Vol.num, Vol.mean,Vol.WtAvg, Vol.dev,100*Vol.FracDev,Vol.WtDev,100*Vol.WtFracDev, Vol.Var,Vol.WtVar, Vol.max,Vol.min, Natoms.WtAvg,Natoms.WtDev,100*Natoms.WtFracDev, Natoms.max,Natoms.min); select++; { # define IS2(a,b) IncrementStats(&IS(a,b),a.b) IS2(Vol,num), IS2(Vol,mean), IS2(Vol,WtAvg), IS2(Vol,dev), IS2(Vol,Var), IS2(Vol,WtVar), IncrementStats(&Vol_FracDev,100*Vol.FracDev), IS2(Vol,WtDev), IncrementStats(&Vol_WtFracDev,100*Vol.WtFracDev), IS2(Vol,max), IS2(Vol,min), IS2(Natoms,WtAvg), IS2(Natoms,WtDev), IncrementStats(&Natoms_WtFracDev,100*Natoms.WtFracDev), IS2(Natoms,max), IS2(Natoms,min); } } } { char * FMT = CH1" MEAN :" TAB "%5.0f" TAB "%5.0f" TAB "%5.0f" TAB "%5.1f" TAB "%4.1f" TAB "%5.1f" TAB "%4.1f" TAB "%5.0f" TAB "%5.0f" TAB "%4.0f" TAB "%4.0f" TAB "%4.0f" TAB "%5.1f" TAB "%4.1f" TAB "%4.0f" TAB "%4.0f" "\n" ; # define IS3(a,b) (IS(a,b).mean) printf(FMT, IS3(Vol,num), IS3(Vol,mean), IS3(Vol,WtAvg), IS3(Vol,dev), IS3(Vol,FracDev), IS3(Vol,WtDev), IS3(Vol,WtFracDev), IS3(Vol,Var), IS3(Vol,WtVar), IS3(Vol,max), IS3(Vol,min), IS3(Natoms,WtAvg), IS3(Natoms,WtDev), IS3(Natoms,WtFracDev), IS3(Natoms,max), IS3(Natoms,min)); } { char * FMT = CH1" SUM :" TAB "%5.0f" TAB "%5.0f" TAB "%5.0f" TAB "%5.1f" TAB "%4.1f" TAB "%5.1f" TAB "%4.1f" TAB "%5.0f" TAB "%5.0f" TAB "%4.0f" TAB "%4.0f" TAB "%4.0f" TAB "%5.1f" TAB "%4.1f" TAB "%4.0f" TAB "%4.0f" "\n" ; # define IS3(a,b) (IS(a,b).sum) # define DUMMY ((double) 0.0) printf(FMT, IS3(Vol,num), IS3(Vol,mean), IS3(Vol,WtAvg), DUMMY, DUMMY, DUMMY, DUMMY, IS3(Vol,Var), IS3(Vol,WtVar), DUMMY, DUMMY, IS3(Natoms,WtAvg), DUMMY, DUMMY, DUMMY, DUMMY); } { double n = Vol_mean.num ; InitStats(&S->IndVol); S->IndVol.sum = n * Vol_mean.sum ; S->IndVol.WtSum = n * Vol_WtAvg.sum ; S->IndVol.SumOfWt = S->IndVol.num = n ; ReComputeDerivedStats(&S->IndVol); S->IndVol.Var = Vol_Var.sum ; S->IndVol.WtVar = Vol_WtVar.sum ; ReComputeDerivedStats2(&S->IndVol); return S->IndVol ; } } void WriteSeq(TokenizeData * d, Sequences * S) { { InitLexer(d); while (TokenizeLine(d)) if(OnBetwStrgs(d,"_BEG_HEAD_VOL_","_END_HEAD_VOL_")) printf("%s\n",d->T[0]); } { char buf[BUFSIZ] ; int i; sprintf(buf,"%%%ds %%s\n",S->SeqStartPos - 1); for (i=0; i<= S->nseq; i++) { printf(buf,S->seq[i]->id,S->seq[i]->aa+1); } } { InitLexer(d); while (TokenizeLine(d)) if (OnBetwStrgs(d,"_BEG_FOOT_VOL_","_END_FOOT_VOL_")) printf("%s\n",d->T[0]); } } void WriteSeqWts(TokenizeData * d, Sequences * S) { { InitLexer(d); while (TokenizeLine(d)) if(OnBetwStrgs(d,"_BEG_HEAD_VOL_","_END_HEAD_VOL_")) printf("%s\n",d->T[0]); } { char buf[BUFSIZ] ; int i; sprintf(buf,"%%%ds %%s %%6.4f\n",S->SeqStartPos - 1); for (i=0; i<= S->nseq; i++) { printf(buf,S->seq[i]->id,S->seq[i]->aa+1,S->seq[i]->wt); } } { InitLexer(d); while (TokenizeLine(d)) if (OnBetwStrgs(d,"_BEG_FOOT_VOL_","_END_FOOT_VOL_")) printf("%s\n",d->T[0]); } } void WriteFASTA(Sequences * S) { { int i; for (i=0; i<= S->nseq; i++) { printf(">%s\n%s\n",S->seq[i]->id, S->seq[i]->aa+1); } } } TokenizeData * NarrowDataBase(TokenizeData * d, char * DataBase,char * protein) { FILE * f = fopen(DataBase,"r"); /* 1 2 */ /* 012345678901234567890 */ char beg[BUFSIZ] = "_BEG_PROTEIN_????_" ; char end[BUFSIZ] = "_END_PROTEIN_????_" ; char tail[BUFSIZ] = {0}; STJOIN(tail,protein,"_"); strcpy(beg+13,tail); strcpy(end+13,tail); if (f==NULL) STDERR("fopen(DataBase,'r') fails"); d->fp = NarrowFileBetwStrgs(f,beg,end); return d; } void WriteResidueVolumes(VolumeLookupTable *V) { int i; PrintIndexedStrings(V->SI); for (i=0; i<21; i++) printf("%c\t%s\t%d\t%f\t%f\n",V->aa[i],V->aa3[i],V->natoms[i],V->vol[i], V->frac[i]); } void ReadResidueVolumes(char * filename, VolumeLookupTable * V) { int i; int k = 0; TokenizeData d = {0}; bzero(V,sizeof(VolumeLookupTable)); InitTokens(&d,filename); while (TokenizeLine(&d)) { if (OnBetwP(StrEq(d.T[1],"_BEG_VOL_"), StrEq(d.T[1],"_END_VOL_"),d.flag)) { i = IndexOfString(d.T[1], &(V->SI)); V->aa[i] = '-' ; strcpy(V->aa3[i],"---"); V->natoms[i] = -1 ; V->vol[i] = -1; if (i>21) STDERR("ERROR: i>21"); # ifdef OFF fprintf(stderr,"< %5d > %2d %1c %5s %2d %9.5f\n", k++,i, V->aa[i],V->aa3[i],V->natoms[i],V->vol[i]); # endif V->aa[i] = d.T[1][0] ; strcpy(V->aa3[i],d.T[2]) ; V->natoms[i] = (int) d.D[3] ; V->vol[i] = d.D[4]; } } } void ReadWeights(char * filename, Sequences * S) { int i = 0; TokenizeData d = {0} ; d.filename = filename ; assert (InitLexer(&d)) ; while (TokenizeLine(&d)) { if (StrEq(d.T[1],"Name:")) { int i = SymbolTableIndexNoAdd(d.T[2],S->SI); if (i == SYMBOL_NOT_IN_TABLE ) { STDERR("ReadWeights: Sequence Symbol not in table. Probably Hb in DB."); exit(1); } S->seq[i]->wt = d.D[9] ; } } } void SameWtsToAll(Sequences * S) { { int i; for (i=0; i<= S->nseq; i++) { S->seq[i]->wt = 1 ; } } } void ReadKeys(TokenizeData * d, Sequences * S) { InitLexer(d); while (TokenizeLine(d)) { if (OnBetwStrgs(d,"_BEG_KEY_","_END_KEY_")) { int i = SymbolTableIndexNoAdd(d->T[3],S->SI); if (i != SYMBOL_NOT_IN_TABLE) { S->seq[i]->chain = d->T[2][0]; S->seq[i]->struc = SaveString(d->T[1]); } } } } void FixSequences(Sequences * S, bool MustAccessTwice) { int i,j; for (i=0; i<= S->nseq; i++) { if (S->seq[i]->access != 2 && MustAccessTwice) { STDERR("ERROR! S->seq[i].access != 2"); ITOE(i); ATOE(S->seq[i]->id); ITOE(S->seq[i]->access); } else { for (j=1; j<= S->SeqSz; j++) { char * c = &(S->seq[i]->aa[j]); if (islower(*c)) *c = toupper(*c); if (!IS_AA(S->seq[i]->aa[j])) S->seq[i]->aa[j] = '-' ; } S->seq[i]->aa[S->SeqSz + 1] = '\0' ; } } } void ReadCHCseq(char * filename, Sequences * S) { int i,j; bool MustAccessTwice = false ; TokenizeData D = {0}, *d = &D; d->filename = filename ; d->debug = 1 ; InitLexer(d); d->debug = 0 ; d->whitespace="\t -:" ; while (TokenizeLine(d)) { if (d->T[0][0] != '#') { if (OnBetwStrgs(d,"_BEG_1_","_END_1_") || OnBetwStrgs2(d,"_BEG_1A_","_END_1A_",1)) { char * si = d->T[1]; char buf[BUFSIZ]; if (InSymbolTableP(si,S->SI)) { fprintf(stderr,"ReadCHCseq: %s --> %s 2*\n",si,si); si = STJOIN(buf,si," 2*") ; } /* Returns 0 as first IndexOfString. */ i = IndexOfString(si, &(S->SI)); if (i>S->nseq) S->nseq = i; if (S->seq[i] == NULL) { S->seq[i] = (Sequence *) calloc(1,sizeof(Sequence)); S->seq[i]->vol = S->seq[i]->natoms = 0; S->seq[i]->access = 1 ; strcpy(S->seq[i]->id,si); strncpy(&(S->seq[i]->aa[1]),&d->T[0][S->SeqStartPos],S->LengthSeg1-1); } else { STDERR("ERROR! S->seq[i] != NULL"); ATOE(si); ATOE(d->T[0]); } } else if (OnBetwStrgs2(d,"_BEG_2_","_END_2_",2)) { MustAccessTwice = true ; i = IndexOfString(d->T[1], &(S->SI)); if (S->seq[i] != NULL && S->seq[i]->access == 1) { int LengthSeg2 = S->SeqSz - S->LengthSeg1 ; S->seq[i]->access = 2 ; strncpy(&(S->seq[i]->aa[S->LengthSeg1]), &d->T[0][S->SeqStartPos],LengthSeg2); } else { STDERR("ERROR! !(S->seq[i] != NULL && S->seq[i]->access == 1)"); ATOE(d->T[1]); ATOE(d->T[0]); ITOE(S->seq[i]->access); ATOE(S->seq[i]->id); } } } } FixSequences(S, MustAccessTwice); } void WrtSelFASTA(Sequences * S,int * sel, char * cr) { { int i,j; for (i=0; i<= S->nseq; i++) { printf(">%-20s%s",S->seq[i]->id,cr); for (j=0; sel[j] ; j++) printf("%c",S->seq[i]->aa[sel[j]]); printf("\n"); } } }