#!/usr/bin/perl
use CGI qw(:standard);
use DBI;
require aaplot;
require chi;
#------------------MAIN EXECUTABLE SECTION IF CALLED WITH PARAMETERS--------
if(param("plot1") eq "on"){
&getParams();
$usestop = "off";
$count = 20;
if(param("usestop") eq "on"){
$usestop = "on";
$count = 21;
}
my @table = &retrieve($sql_query);
my @sorted = ();
my @distance = ();
#--------------------DISTANCE MATRIX-------------------------------
for $i (0 ... $#table){
$legend[$i] = $table[$i][0] . " " . $table[$i][2] . " " . $table[$i][1];
for $j (0 ... $#table){
if ($i != $j) {
$sum = 0;
for $k (4 ... ($count + 3)) {
$sum += (($table[$i][$k] - $table[$j][$k]) ** 2);
}
$distance[$i][$j] = sqrt($sum);
}
else {
$distance[$i][$j] = 0;
}
}
}
#--------------------TABLE REARRANGEMENT FOR SORTED SETS-----------
if(param("presort") eq "on"){
@sortrow = ();
@keys = ();
# Identify which row in table is the first on the form (i.e. sortby row)
$org = param("org1");
$chr = param("chr1");
$feature = param("type1");
for $i (0 ... $#table){
if(($table[$i][0] eq $org)
&& ($table[$i][1] eq $chr)
&& ($table[$i][2] eq $feature)){
$sortby = $i;
last;
}
}
for $k (4 ... 24){
push(@unsorted, $table[$sortby][$k]);
}
@sorted = sort numerically(@unsorted);
for $k (0 ... 20){
for $n (0 ... 20){
# Notice I put in an extra test; this is necessary
# when two residues have the same composition- which
# to my great surprise is the case with one of the
# pseudogene sets
if(($sorted[$n] == $unsorted[$k])
&& ($keys[$n - 1] != $k)){
$keys[$n] = $k;
last;
}
}
}
}
else{
for $k (0 ... 20){
$keys[$k] = $k;
}
}
#--------------------SPREAD----------------------------------------
for $k (4 ... 24){
$min = 1;
$max = 0;
for $i (0 ... $#table){
$temp[$k - 4] += $table[$i][$k];
if($table[$i][$k] > $max){
$max = $table[$i][$k];
}
if($table[$i][$k] < $min){
$min = $table[$i][$k];
}
}
$nextavg = $temp[$k - 4] / ($#table + 1);
$avg[$k - 4] = sprintf("%1.3lf", $nextavg);
$sum = 0;
for $i (0 ... $#table){
$sum += ($table[$i][$k] - $avg[$k - 4])**2;
}
$newdev = sqrt($sum / ($#table + 1));
$stddev[$k - 4] = sprintf("%1.3lf", $newdev);
$nextspread = 0;
if($avg[$k - 4] > 0){
$nextspread = $newdev / $avg[$k - 4];
}
$spread[$k - 4] = sprintf("%1.3lf", $nextspread);
$nextrange = 0;
if($avg[$k - 4] > 0){
$nextrange = (($max - $min) / $avg[$k - 4]);
}
$range[$k - 4] = sprintf("%1.3lf", $nextrange);
}
###################################################################
#-------------------HTML OUTPUT-----------------------------------#
###################################################################
&cleanLegend;
print "Content-type: text/html\n\n";
print "
Amino Acid Frequencies
";
#---------------STATISTICS TABLE-----------------------------------
&headerRow;
for $i (0 ... $#table){
$type = $table[$i][0];
if($type ne "human" && $type ne "worm" && $type ne "fly" && $type ne "yeast"){
$type = "bact";
}
print "
$legend[$i]
|
$table[$i][3]
| ";
my @stats = ();
my %tdcolors = ();
for $j (4 ... 24){
push(@stats, $table[$i][$j]);
}
$k = 0;
foreach $freq (@listed = sort numerically @stats){
$freq = sprintf("%1.3lf", $freq);
$tdcolors{$freq} = $colors[$k++];
}
for $k (0 ... 20){
$freq = sprintf("%1.3lf", $stats[$keys[$k]]);
print "
$freq
| \n";
}
print "
";
}
#------------------Printout of average and spread rows-------------------
#Mean
$k = 0;
foreach $average (@listed = sort numerically @avg){
$tdcolors{$average} = $colors[$k++];
}
print "
Mean
|
  | ";
for $k (0 ... 20){
print "
$avg[$keys[$k]]
| \n";
}
#Standard deviation
$k = 0;
foreach $devval (@listed = sort numerically @stddev){
$tdcolors{$devval} = $colors[$k++];
}
print "
Std. Dev.
|
  | \n\n";
for $k (0 ... 20){
print "
$stddev[$keys[$k]]
| \n";
}
print "\n";
# Scaled deviation (spread)
$k = 0;
foreach $spreadval (@listed = sort numerically @spread){
$tdcolors{$spreadval} = $colors[$k++];
}
print "
stddev / mean
|
  | \n";
for $k (0 ... 20){
print "
$spread[$keys[$k]]
| \n";
}
print "\n";
# Scaled range
$k = 0;
foreach $rangeval (@listed = sort numerically @range){
$tdcolors{$rangeval} = $colors[$k++];
}
print "
range / mean
|
  | \n";
for $k (0 ... 20){
print "
$range[$keys[$k]]
| \n";
}
print "\n";
#--------------END OF RESIDUE DATA TABLE-------------------------
# Distance matrix display
print "
Distance Matrix |
  | \n";
for $i (0 ... $#table){
print "
$legend[$i]
| \n";
}
print " \n";
#Distance matrix
for $i (0 ... $#table){
print "
$legend[$i]
| \n";
for $j (0 ... $#table){
if ($distance[$i][$j] == 0) {
$d = "-";
}
else {
$d = (sprintf("%1.4f", $distance[$i][$j]) * 100) . "%";
}
print "
$d | \n";
}
print "\n";
}
#Call other scripts to draw images
print " \n |
";
if(param("spreadsort") eq "on"){
print "
";
}
elsif(param("presort") eq "on"){
print "
";
}
else{
print "
";
}
print "
";
}
#------------------NO PARAMETERS (default)----------------------------------
elsif( !(param()) || !(param("plot1"))){
&defaultPage;
}
#------------------SUBROUTINES----------------------------------------------
sub numerically { $b <=> $a; }
sub cleanLegend {
for $i (0 ... $#legend){
$legend[$i] =~ s/\ gene/\ genes/;
$legend[$i] =~ s/\ pseudo/\ pseudogenes/;
$legend[$i] =~ s/\ homol/\ homologies/;
$legend[$i] =~ s/\ chr/\ chromosomes/;
if($legend[$i] =~ /\ all/){
$legend[$i] =~ s/\ all//;
$legend[$i] .= " (all chrs.)";
}
else{
$chrnum = $legend[$i];
$legend[$i] =~ s/\ \w*$//;
$chrnum =~ s/.*\ //g;
$chrnum = "Chr. $chrnum";
$legend[$i] .= ", $chrnum";
}
}
}
# Query parsing section
sub getParams {
$sql_query = "SELECT * FROM aa WHERE ";
if(param("plot1") eq "on"){
$org1 = param("org1");
$chr1 = param("chr1");
$type1 = param("type1");
$query_string .= "&plot1=" . param("org1") . "." . param("chr1") . "." . param("type1");
$sql_query .= " ( org = '$org1' AND feature = '$type1' AND chromosome = '$chr1')";
}
if(param("plot2") eq "on"){
$org2 = param("org2");
$chr2 = param("chr2");
$type2 = param("type2");
$query_string .= "&plot2=" . param("org2") . "." . param("chr2") . "." . param("type2");
$sql_query .= " OR ( org = '$org2' AND feature = '$type2' AND chromosome = '$chr2')";
}
if(param("plot3") eq "on"){
$org3 = param("org3");
$chr3 = param("chr3");
$type3 = param("type3");
$query_string .= "&plot3=" . param("org3") . "." . param("chr3") . "." . param("type3");
$sql_query .= " OR ( org = '$org3' AND feature = '$type3' AND chromosome = '$chr3')";
}
if(param("plot4") eq "on"){
$org4 = param("org4");
$chr4 = param("chr4");
$type4 = param("type4");
$query_string .= "&plot4=" . param("org4") . "." . param("chr4") . "." . param("type4");
$sql_query .= " OR ( org = '$org4' AND feature = '$type4' AND chromosome = '$chr4')";
}
if(param("plot5") eq "on"){
$chr5 = param("chr5");
$org5 = param("org5");
$type5 = param("type5");
$query_string .= "&plot5=" . param("org5") . "." . param("chr5") . "." . param("type5");
$sql_query .= " OR ( org = '$org5' AND feature = '$type5' AND chromosome = '$chr5')";
}
if(param("plot6") eq "on"){
$org6 = param("org6");
$chr6 = param("chr6");
$type6 = param("type6");
$query_string .= "&plot6=" . param("org6") . "." . param("chr6") . "." . param("type6");
$sql_query .= " OR ( org = '$org6' AND feature = '$type6' AND chromosome = '$chr6')";
}
if(!($size = param("size"))){
$size = 20;
}
$query_string .= "&size=$size";
}
sub headerRow {
if(param("presort") eq "on"){
print "Plotted data (sorted by $org1 $type1, chr. $chr1)
";
}
else{
print "Plotted data (unsorted)
";
}
print "
Feature |
Residues |
$residues[$keys[0]] |
$residues[$keys[1]] |
$residues[$keys[2]] |
$residues[$keys[3]] |
$residues[$keys[4]] |
$residues[$keys[5]] |
$residues[$keys[6]] |
$residues[$keys[7]] |
$residues[$keys[8]] |
$residues[$keys[9]] |
$residues[$keys[10]] |
$residues[$keys[11]] |
$residues[$keys[12]] |
$residues[$keys[13]] |
$residues[$keys[14]] |
$residues[$keys[15]] |
$residues[$keys[16]] |
$residues[$keys[17]] |
$residues[$keys[18]] |
$residues[$keys[19]] |
$residues[$keys[20]] |
";
}
# Index page, displayed when executed without parameters
sub defaultPage {
print header(), start_html("Comparative Residue Frequency");
print body({-bgcolor=>'white', -link=>'#004600', -vlink=>'#7A0000'},
h2("Amino Acid Composition Statistics for Completed Genomes"),
h4("[ Sequence and database dumps available here ]"),
h4("Select features to plot: [ Alternate human-only browser ]"),
start_form({-method=>'POST'}),
table({-bgcolor=>'#E2E2E2', -cellspacing=>'2', -cellpadding=>'2', -cols=>'2', -width=>'820', -border=>'0'},
# Careful! The layout is 1,4,2,5,3,6 as printed here.
Tr(td(
font({-face=>'arial, Helvetica, sans-serif', -size=>'1'},
input({-type=>'checkbox', -name=>'plot1', -value=>'on'}, b("1.")),
b("Organism: "),
popup_menu(-name=>'org1',
-values=>['human', 'fly', 'worm', 'yeast', '--', 'Mlep',
'aero','aful','aquae','bbur','bhal','bsub',
'buch','cjej','cpneu','ctra','dra','ecoli',
'hbsp','hinf','hpyl','llact','mgen','mjan',
'mpneu','mthe','mtub','nmen','pabyssi',
'paer','pmul','pyro','rpxx','synecho',
'tacid','tmar','tpal','uure','vcho','xfas'],
-default=>'human'),
b("Chromosome: "),
popup_menu(-name=>'chr1',
-values=>['1','2','3','4','5','6','--','7','8',
'9','10','11','12','13','14','15','16',
'--','21','22','--','2L','2R','3L','3R',
'4','--','X', 'all'],
-default=>'all'),
b("Feature: "),
popup_menu(-name=>'type1',
-values=>['chr', 'gene',
'homol', 'pseudo'],
-default=>'gene'),
)),
td(
font({-face=>'arial, Helvetica, sans-serif', -size=>'1'},
input({-type=>'checkbox', -name=>'plot4', -value=>'on'}, b("4.")),
b("Organism: "),
popup_menu(-name=>'org4',
-values=>['human', 'fly', 'worm', 'yeast', '--', 'Mlep',
'aero','aful','aquae','bbur','bhal','bsub',
'buch','cjej','cpneu','ctra','dra','ecoli',
'hbsp','hinf','hpyl','llact','mgen','mjan',
'mpneu','mthe','mtub','nmen','pabyssi',
'paer','pmul','pyro','rpxx','synecho',
'tacid','tmar','tpal','uure','vcho','xfas'],
-default=>'yeast'),
b("Chromosome: "),
popup_menu(-name=>'chr4',
-values=>['1','2','3','4','5','6','--','7','8',
'9','10','11','12','13','14','15','16',
'--','21','22','--','2L','2R','3L','3R',
'4','--','X', 'all'],
-default=>'all'),
b("Feature: "),
popup_menu(-name=>'type4',
-values=>['chr', 'gene',
'homol', 'pseudo'],
-default=>'gene'),
))),
Tr(td(
font({-face=>'arial, Helvetica, sans-serif', -size=>'1'},
input({-type=>'checkbox', -name=>'plot2', -value=>'on'}, b("2.")),
b("Organism: "),
popup_menu(-name=>'org2',
-values=>['human', 'fly', 'worm', 'yeast', '--', 'Mlep',
'aero','aful','aquae','bbur','bhal','bsub',
'buch','cjej','cpneu','ctra','dra','ecoli',
'hbsp','hinf','hpyl','llact','mgen','mjan',
'mpneu','mthe','mtub','nmen','pabyssi',
'paer','pmul','pyro','rpxx','synecho',
'tacid','tmar','tpal','uure','vcho','xfas'],
-default=>'fly'),
b("Chromosome: "),
popup_menu(-name=>'chr2',
-values=>['1','2','3','4','5','6','--','7','8',
'9','10','11','12','13','14','15','16',
'--','21','22','--','2L','2R','3L','3R',
'4','--','X', 'all'],
-default=>'all'),
b("Feature: "),
popup_menu(-name=>'type2',
-values=>['chr', 'gene',
'homol', 'pseudo'],
-default=>'gene'),
)),
td(
font({-face=>'arial, Helvetica, sans-serif', -size=>'1'},
input({-type=>'checkbox', -name=>'plot5', -value=>'on'}, b("5.")),
b("Organism: "),
popup_menu(-name=>'org5',
-values=>['human', 'fly', 'worm', 'yeast', '--', 'Mlep',
'aero','aful','aquae','bbur','bhal','bsub',
'buch','cjej','cpneu','ctra','dra','ecoli',
'hbsp','hinf','hpyl','llact','mgen','mjan',
'mpneu','mthe','mtub','nmen','pabyssi',
'paer','pmul','pyro','rpxx','synecho',
'tacid','tmar','tpal','uure','vcho','xfas'],
-default=>'yeast'),
b("Chromosome: "),
popup_menu(-name=>'chr5',
-values=>['1','2','3','4','5','6','--','7','8',
'9','10','11','12','13','14','15','16',
'--','21','22','--','2L','2R','3L','3R',
'4','--','X', 'all'],
-default=>'all'),
b("Feature: "),
popup_menu(-name=>'type5',
-values=>['chr', 'gene',
'homol', 'pseudo'],
-default=>'gene'),
))),
Tr(td(
font({-face=>'arial, Helvetica, sans-serif', -size=>'1'},
input({-type=>'checkbox', -name=>'plot3', -value=>'on'}, b("3.")),
b("Organism: "),
popup_menu(-name=>'org3',
-values=>['human', 'fly', 'worm', 'yeast', '--', 'Mlep',
'aero','aful','aquae','bbur','bhal','bsub',
'buch','cjej','cpneu','ctra','dra','ecoli',
'hbsp','hinf','hpyl','llact','mgen','mjan',
'mpneu','mthe','mtub','nmen','pabyssi',
'paer','pmul','pyro','rpxx','synecho',
'tacid','tmar','tpal','uure','vcho','xfas'],
-default=>'worm'),
b("Chromosome: "),
popup_menu(-name=>'chr3',
-values=>['1','2','3','4','5','6','--','7','8',
'9','10','11','12','13','14','15','16',
'--','21','22','--','2L','2R','3L','3R',
'4','--','X', 'all'],
-default=>'all'),
b("Feature: "),
popup_menu(-name=>'type3',
-values=>['chr', 'gene',
'homol', 'pseudo'],
-default=>'gene'),
)),
td(
font({-face=>'arial, Helvetica, sans-serif', -size=>'1'},
input({-type=>'checkbox', -name=>'plot6', -value=>'on'}, b("6.")),
b("Organism: "),
popup_menu(-name=>'org6',
-values=>['human', 'fly', 'worm', 'yeast', '--', 'Mlep',
'aero','aful','aquae','bbur','bhal','bsub',
'buch','cjej','cpneu','ctra','dra','ecoli',
'hbsp','hinf','hpyl','llact','mgen','mjan',
'mpneu','mthe','mtub','nmen','pabyssi',
'paer','pmul','pyro','rpxx','synecho',
'tacid','tmar','tpal','uure','vcho','xfas'],
-default=>'yeast'),
b("Chromosome: "),
popup_menu(-name=>'chr6',
-values=>['1','2','3','4','5','6','--','7','8',
'9','10','11','12','13','14','15','16',
'--','21','22','--','2L','2R','3L','3R',
'4','--','X', 'all'],
-default=>'all'),
b("Feature: "),
popup_menu(-name=>'type6',
-values=>['chr', 'gene',
'homol', 'pseudo'],
-default=>'gene'),
))),
Tr(td({-colspan=>'2'},
font({-face=>'arial, Helvetica, sans-serif', -size=>'2'},
"Plot Size: ",
popup_menu(-name=>'size',
-values=>['20', '25', '30', '40'],
-labels=>{'20'=>'small (460x300)',
'25'=>'medium (565x360)',
'30'=>'large (670x420)',
'40'=>'HUGE (880x540)'},
-default=>'460x300'),
input({-type=>'checkbox', -value=>'on', -name=>'presort'},
b("Sort frequencies by first set")),
input({-type=>'checkbox', -value=>'on', -name=>'usestop'},
b("Use stop codons in dist. matrix")),
submit("Plot Values"), reset(),
br(),
blockquote(
b("Example with default selection:"), br(),
img({-src=>'./chart.cgi', -border=>'0'})))))),
hr(),
table({-width=>'640'},
Tr(td(
font({-face=>'arial, Helvetica, sans-serif', -size=>'2'},
"Not all chromosomes and features can be used with all organisms.",
"Until I get a better knowledge of JavaScript, this page will detect",
"type mismatches manually.")))));
print end_html();
}