#!/usr/bin/perl #this script takes a *.pdb file as the first argument #figures out volume and mw and psvol based on #method laid out in Harpaz et al, Structure, 15July94, pp641-649. # editted from mk_char_seq_prot.pl # 31Aug98 # will now differentiate between sequence file and not # form :: calc-psvol.pl [args val] *.(pdb/fasta) # 8Sep98 # giving up on trying to get it to agree with previous paper # will use pdb, to get correct disulfide data # order: # 1. finds disulfides from header # 2. counts amino acids # 3. figures mw and vol from # 4. adjusts for disulfides and/or ends # 5. outputs # subroutine to return variable after command line argument # usage: $stt = varafteroption("stt", 1.00, "start oxygen radii at "); sub varafteroption { my $changed=0; for($ii=0;$ii<@ARGV;$ii++) { if($ARGV[$ii] eq $_[0]) { $var = $ARGV[$ii+1]; $changed++; print STDERR "NEW $_[2]($_[0]) $var\n"; last; } } if(!$changed) { $var = $_[1]; print STDERR "DEFAULT $_[2]($_[0]) $var\n"; } return $var; } $volfile = varafteroption(vol,"CHC-res-vol", "volumes from file "); open(VOL,"$volfile") || die "couldn't open $volfile\n$!\n"; while() { next if /res/ || /#/; chop; @tt=split' '; $vol{$tt[0]}=$tt[1]; $svol{$tt[2]}=$tt[1]; } close(VOL); $mwfile = varafteroption(mw,"MW-res", "molecular weights from file "); open(MW,"$mwfile") || die "couldn't open $mwfile\n$!\n"; while() { next if /res/ || /#/; chop; @tt=split' '; $mw{$tt[0]}=$tt[1]; $smw{$tt[2]}=$tt[1]; } close(MW); $doss = varafteroption(ss, 1, "doing ss "); if (@ARGV>0) { $nm=$ARGV[$#ARGV]; } else { die "need single argument :: calc-psvol.pl [variables] filename\n"; } #pdb file: #ATOM 0 N LYS A 1 3.240 10.040 10.380 1.00 -0.31 #ATOM 1 CA LYS A 1 2.390 10.410 9.250 1.00 -0.20 $nres=0; $nlys=0; $narg=0; $nglu=0; $nasp=0; $tmw=0; $tvol=0; $chains=0; $nss=0; if ($nm =~ /pdb/) { $pdbfile = $nm; open(PDB,$pdbfile) || die "couldn't open $pdbfile\n$!\n"; while ($line=) { chop($line); @in=split(' ',$line); if($in[0] eq "SSBOND" && $doss) { $nss++; if($in[3]=~/\D+/) { $chains=1; $ssc{$in[3]}[$in[4]]=1; $ssc{$in[6]}[$in[7]]=1; } else { $ss[$in[3]]=1; $ss[$in[5]]=1; } } elsif($in[0] eq "ATOM" && $line =~ "CA") { $res=$in[3]; if($res eq "CYS" && $doss) { if($chains) { if($ssc{$in[4]}[$in[5]]==1) { $res="CSS"; } } else { if($ss[$in[4]]==1) { $res="CSS"; } } # printf STDERR "$res $ss[$in[4]] $line\n"; } $res[$nres]=$res; $nres++; if($res eq "LYS") { $nlys++; } elsif($res eq "ARG") { $narg++; } elsif($res eq "GLU") { $nglu++; } elsif($res eq "ASP") { $nasp++; } $tmw+=$mw{$res}; $tvol+=$vol{$res}; } } close(PDB); } elsif ($nm =~ /fasta/) { open(OO,$nm) || die "couldn't open $pdbfile\n$!\n"; while () { if (/^>/ .. /^$/) { next if /^>/ || /^$/; chop; @in=split/ */; foreach $a (@in) { $tmw+=$smw{$a}; $tvol+=$svol{$a}; if($a eq "K") { $nlys++; } elsif($a eq "R") { $narg++; } elsif($a eq "D") { $nglu++; } elsif($a eq "E") { $nasp++; } $nres++; } } } close(OO); } # add extra oxygen at c-terminus and extra a n terminus if(varafteroption("tt", 1, "including termini weights ")) { $tmw+=(16+1); } if(varafteroption("acetyl", 0, "including termini weights ")) { $tmw+=(24+16+3); } $ex=varafteroption("ex", 0, "extra info "); # calculate psvol # take 10 off for every carboxyl oxygen (two per asp and glu and two for end) # take 18 off for amino and guanadino (include end) if($doss) {$tmw-=$nss*2;} printf "%15s res %d ss %d v %.2lf w %.2lf ", $nm, $nres, $nss, $tvol, $tmw; $tvol=$tvol-(2*10*($nasp+$nglu+1))-(18*($nlys+$narg)); printf "mv %.2lf ", $tvol; if($ex) { printf "- D %d E %d _ K %d R %d ", $nasp, $nglu, $nlys, $narg; } $psvol=0.6023*$tvol/$tmw; printf "psv %.3lf\n", $psvol; __END__