# include "Entropy.h" int Lookup(int i, int *table) { int j ; for (j=1; table[j] != 0; j++) { if (table[j] == i) return j ; } return 0; } Profile * ConstructAAProfile(int Nsites, int * SiteInt, char ** SiteName) { Profile * p = (Profile *) calloc(1,sizeof(Profile)) ; { int t=1; char * a; p->Naa = 23 ; p->aaName = (char **) calloc(p->Naa+1,sizeof(char *)) - 1 ; p->aaInt = (int * ) calloc(p->Naa+1,sizeof(int )) - 1 ; # define ASSIGN_AA(int_,name_) \ p->aaName[t] = MAKESTRING(name_) ;\ a=MAKESTRING(int_); p->aaInt[t] = (int) a[0];\ assert(t<=p->Naa); t++ ; ASSIGN_AA(G,GLY) ; ASSIGN_AA(A,ALA) ; ASSIGN_AA(V,VAL) ; ASSIGN_AA(L,LEU) ; ASSIGN_AA(I,ILE) ; ASSIGN_AA(P,PRO) ; ASSIGN_AA(M,MET) ; ASSIGN_AA(F,PHE) ; ASSIGN_AA(Y,TYR) ; ASSIGN_AA(W,TRP) ; ASSIGN_AA(S,SER) ; ASSIGN_AA(T,THR) ; ASSIGN_AA(N,ASN) ; ASSIGN_AA(Q,GLN) ; ASSIGN_AA(C,CYS) ; ASSIGN_AA(H,HIS) ; ASSIGN_AA(E,GLU) ; ASSIGN_AA(D,ASP) ; ASSIGN_AA(R,ARG) ; ASSIGN_AA(K,LYS) ; ASSIGN_AA(Z,GLX) ; ASSIGN_AA(B,ASX) ; ASSIGN_AA(-,GAP) ; p->aaName[t] = NULL ; p->aaInt[t] = 0 ; assert(t==p->Naa+1) ; # undef ASSIGN_AA } { int r; p->Nsites = Nsites ; p->SiteName = (char **) calloc(Nsites+1, sizeof(char *)) - 1; p->SiteInt = (int * ) calloc(Nsites+1, sizeof(int )) - 1; for (r=1; r<=Nsites; r++) { assert(SiteInt[r] != 0); p->SiteName[r] = SaveString(SiteName[r]); p->SiteInt[r] = SiteInt[r]; } p->SiteInt[Nsites+1] = 0; p->SiteName[Nsites+1] = NULL; } { p->Table = dmatrix(1,p->Nsites,1,p->Naa); p->SumAA = dvector(1,p->Nsites); p->SumSite = dvector(1,p->Naa) ; } return p ; } void PrintProfile(Profile *p) { int r,t; printf("%12s"," "); for (t=1;t<=p->Naa;t++) printf("%3s ",p->aaName[t]); CR; printf("%13s"," "); for (t=1;t<=p->Naa;t++) printf("%1c%2s ", p->aaInt[t]," "); printf("Tot "); CR; for (r=1;r<=p->Nsites;r++) { printf("%5s ",p->SiteName[r]); printf("%4d ",p->SiteInt[r]); for (t=1;t<=p->Naa;t++) printf("%3.0f ",p->Table[r][t]); printf("%3.0f ",p->SumAA[r]); CR; } printf("%11s","Tot"); for (t=1;t<=p->Naa;t++) printf("%3.0f ", p->SumSite[t]," "); CR; } void ComputeProfileWWts(Sequences * S, Profile * p) { int s,r ; for (s=0; s<=S->nseq; s++) { for (r = 1; r <= p->Nsites; r++) { int R = p->SiteInt[ r ] ; int T = (int) S->seq[s]->aa[R]; int t = Lookup(T,p->aaInt); if (t == 0) { STDERR("t==0: You've discovered a new type of Amino Acid."); } else { p->Table[r][t] += S->seq[s]->wt ; p->SumAA[r] += S->seq[s]->wt ; p->SumSite[t] += S->seq[s]->wt ; } } } } void ComputeProfile(Sequences * S, Profile * p) { int s,r ; for (s=0; s<=S->nseq; s++) { # ifdef OFF fprintf(stderr,"ComputeProfile(): At Sequence %d\n",s); # endif for (r = 1; r <= p->Nsites; r++) { int R = p->SiteInt[ r ] ; int T = (int) S->seq[s]->aa[R]; int t = Lookup(T,p->aaInt); if (t == 0) { STDERR("t==0: You've discovered a new type of Amino Acid."); } else { p->Table[r][t]++ ; p->SumAA[r]++ ; p->SumSite[t]++ ; } } } } nrVectorPtr ConstructComputeEntropy(Profile *p) { int r,t; double ln_2 = log (2.0) ; /* log2(x) = ln(x)/ln_2 */ nrVectorPtr Entropy = dvector(1,p->Nsites); for (r=1;r<=p->Nsites;r++) { Entropy[r] = 0; for (t=1;t<=p->Naa;t++) { double p_r = p->Table[r][t]/p->SumAA[r] ; if (p_r > FLT_MIN ) Entropy[r] -= p_r * log (p_r) / ln_2 ; } # ifdef OFF printf("%7s %9.4f\n",p->SiteName[r],Entropy[r]); # endif } return Entropy ; } void PrintProfileWEntropy(Profile *p, nrVectorPtr Entropy) { int r,t; printf("%12s"," "); printf("%7s ","Entropy"); for (t=1;t<=p->Naa;t++) printf("%3s ",p->aaName[t]); CR; printf("%21s"," "); for (t=1;t<=p->Naa;t++) printf("%1c%2s ", p->aaInt[t]," "); printf("Tot "); CR; for (r=1;r<=p->Nsites;r++) { printf("%5s ",p->SiteName[r]); printf("%4d ",p->SiteInt[r]); printf("%7.4f ",Entropy[r]); for (t=1;t<=p->Naa;t++) printf("%3.0f ",p->Table[r][t]); printf("%3.0f ",p->SumAA[r]); CR; } printf("%11s","Tot"); for (t=1;t<=p->Naa;t++) printf("%3.0f ", p->SumSite[t]," "); CR; }