# # read Sib-pair output and create a pretty HTML document that # summarises it with links to detailed tables # AWK=awk if ! [ -f $1 ] || [ "$1" = "" ] then echo "sp2html: summarise Sib-pair output file as a HTML document." exit fi # # Summary # $AWK 'BEGIN { comment="[!#]" first_comment=0 # flag whether writing out contents of first comment prodrome=1 # reading preliminary data nfreq=0 # number of allele frequency tabulations nanal=6 # number of possible analyses analname[1]="APM" analname[2]="GPM" analname[3]="H-E" analname[4]="TDT" analname[5]="Ass" analname[6]="ASP" header="|||| SIB-PAIR: A program for simple genetic analysis" } NR==1 && $0!=header { print "This does not seem to be output from Sib-pair!" print "Exiting." exit } # # Initialize the HTML output file # NR==1 { getline version=$NF getline getline daterun=substr($0,16) print "" print "" print "Summary of Sib-pair output "" \ FILENAME """ print "" print "" print " " print "

Summary and full text versions of Sib-pair (v" \ version ") output file "" \ FILENAME ""

" } # # Copy the text of the first comment # first_comment<2 { if (substr($1,1,1) ~ comment) { if (first_comment==0 && length($0>2)) { first_comment=1 printf "\n

Text of first comment

\n\n
\n"
                }
                print $0
              }else{if (substr($1,1,1) !~ comment && first_comment==1) {
                first_comment=2
                print "
" }} } $1=="Pedigree" && $2=="file" { pedfile=$NF } substr($0,1,25)=="Total number of pedigrees" { numped=$NF } substr($0,1,24)=="Total number of subjects" && prodrome { prodrome=0 numrec=$NF print " " print "

Description of run

" print " " print "" print "
Date run" daterun print "
Pedigree file"" pedfile """ print "
No. of Individuals" numrec print "
No. of Pedigrees" numped print "
" print " " print "

Loci included in analysis

" print " " print "

" print "" print "
Locus nameLocus Type" ntrait=0 nmark=0 for (i=1;i<=numloc;i++) { if (loctyp[i]=="m") { nmark++ markername[nmark]=locnam[i] typ="Marker (codominant)" }else{if (loctyp[i]=="a") { ntrait++ traitname[ntrait]=locnam[i] typ="Binary trait" }else{if (loctyp[i]=="q") { ntrait++ traitname[ntrait]=locnam[i] typ="Quantitative trait" }else{ typ=" " }}} print "
" locnam[i] "" typ } print "
" } # # Tabulate the loci # $0=="Locus Type Position" { getline getline numloc=0 while (NF>0) { numloc++ locnam[numloc]=$1 loctyp[numloc]=$2 getline } } # # Interpret the results # $1=="Allele" && $2=="frequencies" { typ="Fre" gsub(/\"/," "); nfreq++ frename[nfreq]=$NF getline getline getline minall[nfreq]=$1 while (NF>0) { maxall[nfreq]=$1 getline } getline numalleles[nfreq]=$NF getline het[nfreq]=$NF getline getline numtyped[nfreq]=$5 } $1=="Hardy-Weinberg" && $2=="equilibrium" { typ="HWE" gsub(/\"/," "); mrker=$NF } $1=="APM" { has_res[1]=1 typ="APM"; gsub(/\"/," "); trait=$4; mrker=$8 $9; aff=0; gpm=0 } $1=="Aff-Aff" && typ=="APM" {aff=1} $1=="Aff-UnA" && typ=="APM" {aff=0} $1=="GPM" && typ=="APM" { has_res[2]=1; gpm=1} $1=="H-E" { gsub(/\"/," "); has_res[3]=1; typ="H-E"; trait=$4; mrker=$7 } $1=="TDT" { gsub(/\"/," "); has_res[4]=1; typ="TDT"; trait=$4; mrker=$7 } $1=="Assoc" { gsub(/\"/," "); has_res[5]=1; typ="Ass"; trait=$4; mrker=$7 } $2=="ASP" { gsub(/\"/," "); has_res[6]=1; typ="ASP"; trait=$4; mrker=$7 } $1=="P-value=" { if($2<0.001){flag="+++" }else{if($2<0.01){flag="++" }else{if($2<0.05){flag="+" }else{flag="0" }}} pval=$2 getline if($1=="Empiric"){ empval=$NF if(empval<0.001){flag="+++" }else{if(empval<0.01){flag="++" }else{if(empval<0.05){flag="+" }}} } result[trait,typ,mrker]=flag } typ=="Ass" && $1=="Equalled" { empval=$NF gsub("[()]","",empval) if(empval<0.001){flag="+++" }else{if(empval<0.01){flag="++" }else{if(empval<0.05){flag="+" }else{flag="0" }}} result[trait,typ,mrker]=flag } typ=="Ass" && $2=="P-value" { if($4<0.001){flag="+++" }else{if($4<0.01){flag="++" }else{if($4<0.05){flag="+" }else{flag="0" }}} pval=$4 getline if($1=="Equalled"){ empval=$NF gsub("[()]","",empval) if(empval<0.001){flag="+++" }else{if(empval<0.01){flag="++" }else{if(empval<0.05){flag="+" }}} } result[trait,typ,mrker]=flag } typ=="H-E" && $1=="t" && $2=="value" { pval=$NF gsub("[()]","",pval) gsub("P=","",pval) if(pval<0.001){flag="+++" }else{if(pval<0.01){flag="++" }else{if(pval<0.05){flag="+" }else{flag="0" }}} result[trait,typ,mrker]=flag } typ=="APM" && $1=="f(p)" && $3=="1/sqrt(p)" && aff { pval=$6 if(pval<0.001){flag="+++" }else{if(pval<0.01){flag="++" }else{if(pval<0.05){flag="+" }else{flag="0" }}} result[trait,typ,mrker]=flag } typ=="APM" && $1=="f(p)" && $3=="1/sqrt(p)" && gpm { pval=$6 if(pval<0.001){flag="+++" }else{if(pval<0.01){flag="++" }else{if(pval<0.05){flag="+" }else{flag="0" }}} result[trait,"GPM",mrker]=flag } typ=="ASP" && $1=="Full-sibs" { if ($6>$7) { fchar="+" }else{ fchar="-" } } typ=="ASP" && $1=="Full-Sib" { pval=0+substr($NF,length($NF)-5,5) if(pval<0.001){flag=" " fchar fchar fchar }else{if(pval<0.01){flag=" " fchar fchar }else{if(pval<0.05){flag=" " fchar }else{flag="0" }}} result[trait,typ,mrker]=flag } typ=="HWE" && $2=="P-value" && $1=="Nominal" { pval=$4 if(pval<0.001){flag="+++" }else{if(pval<0.01){flag="++" }else{if(pval<0.05){flag="+" }else{flag="0" }}} hwe_pvalue[mrker]=flag } typ=="HWE" && $1=="Equalled" { empval=$NF gsub("[()]","",empval) if(empval<0.001){flag="+++" }else{if(empval<0.01){flag="++" }else{if(empval<0.05){flag="+" }else{flag="0" }}} hwe_pvalue[mrker]=flag } # # Write out the results # END { print " " print "

Results

" printf "\n

\n" \ "
\n" \ "Coding for statistical signifance\n
CodeMeaning\n" \ "
+++" \ "P < 0.001\n" \ "
++" \ "0.001 < P < 0.01\n" \ "
+" \ "0.01 < P < 0.05\n" \ "
0" \ "0.05 < P\n" \ "
." \ "Test missing\n" \ "
\n\n" # # First, allele frequencies # if (nfreq>0) { print "

Marker analyses

" printf "

\n\n
Marker" \ "Persons typedNo. of alleles" \ "Allele (size) rangeHeterozygosityHWE test" for(k=1;k<=nfreq;k++) { ch=hwe_pvalue[frename[k]] if (ch=="") {ch="."} print "
" frename[k] "" \ "" numtyped[k] \ "" numalleles[k] \ "" minall[k] "--" maxall[k] \ "" het[k] \ "" ch } printf "
\n\n" } # # Then trait analyses # print "

Trait analyses

" print " " for(i=1;i<=ntrait;i++) { printf "

\n\n" \ "\n" \ "" for(k=1;k<=nanal;k++) { if (has_res[k]) { print "
Trait: " \ """ traitname[i] ""
Marker" \ "Test\n
" \ analname[k] "" }else{ print "" analname[k] } } for(j=1;j<=nmark;j++) { printf "
" markername[j] for(k=1;k<=nanal;k++) { ch=result[traitname[i],analname[k],markername[j]] if (ch=="") { ch="." } printf "" ch } printf "\n" } print "
" } }' $1 # # Second pass to put in the detail # $AWK 'BEGIN { comment="[!#]" in_comment=0 # within a comment in_table=0 # within any table freq_table=0 # printing table of allele frequencies apm_table=0 # printing table of APM sharing ibs_table=0 # printing table of ibs ASP sharing tdt_table=0 # printing table of TDT results first_freq=1 # start of set of tables of allele freqs first_apm=1 # start of set of tables of APM results first_ibs=1 # start of set of tables of ASP results first_tdt=1 # start of set of tables of TDT results last_unfo=0 # last line of unformatted output header="|||| SIB-PAIR: A program for simple genetic analysis" typloc["a"]="aff" typloc["q"]="qua" typloc["m"]="mar" printf "\n

Complete output

\n\n

\n" } # # Remove reserved symbols # { gsub("&","\\\&",$0) gsub("<","\\\<",$0) gsub(">","\\\>",$0) gsub("\"","\\\"",$0) # # Tidy up completed blocks # if (in_table && $0 !~ /=/) { in_table=0 printf "\n\n" }else{if (in_comment && substr($1,1,1) !~ comment) { in_comment=0 print "" }} # # Perform action appropriate to current line # if ($0==header) { print "


" print "" substr($0,1,4) "" substr($0,5) "
" getline print "" substr($0,1,4) "" substr($0,5) "
" getline print "" substr($0,1,4) "" substr($0,5) "
" getline print "" substr($0,1,4) "" substr($0,5) "
" print "
" getline # # A comment # }else{if (substr($1,1,1) ~ comment) { if (!in_comment) { in_comment=1 print "

" print "

"
                }
                print $0
#
# A warning or informative message
#
              }else{if ($0 ~ /ERROR:/) {
                printf "

\nERROR: " substr($0,6) "\n" getline while (length($0)>0 && $1!="Pedigree:") { print $0 getline } print "\n

\n" }else{if ($0 ~ /NOTE:/) { printf "

\nNOTE: " substr($0,6) "\n" getline while (length($0)>0 && $1!="Pedigree:" && $0 !~/NOTE/) { print $0 getline } print "\n

\n" # # The title of the current analysis # }else{if ($0 ~ /-------------------------------/) { getline freq_table=($1=="Allele" && $2=="frequencies") ibs_table=($1=="IBS" && $2=="ASP") tdt_table=($1=="TDT") if (freq_table && first_freq) { first_freq=0 printf "

" $0 "

" }else{if (apm_table && first_apm) { first_apm=0 printf "

" $0 "

" }else{if (tdt_table && first_tdt) { first_tdt=0 printf "

" $0 "

" }else{if (ibs_table && first_ibs) { first_ibs=0 printf "

" $0 "

" }else{ print "

" $0 "

" }}}} getline # # List of dyads variable = value. Formatted as a table. # }else{if ($0 ~ /=/) { if (!in_table) { in_table=1 printf "

\n\n" } pos=index($0,"=") printf "
" substr($0,1,pos-1) \ "" substr($0,pos) "\n" # # List of loci # }else{if ($0=="Locus Type Position") { printf "\n" \ "

\n\n" \ "
LocusTypePosition\n" getline getline while(NF>2) { print "
" $1 "" typloc[$2] \ "" $3 $4 getline } printf "
\n\n" # # table of allele-by-allele TDT results # }else{if ($0 ~ /Allele by Allele TDT/) { getline getline getline printf "

\n\n\n" \ "
\n" \ "Allele by Allele TDT
AlleleTransNot TrTDTP-value\n" while (NF==5) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 \ "" $5 getline } printf "
\n\n" # # table of genotypic TDT results # }else{if ($0 ~ /Marginal Genotypic TDT/) { getline getline getline printf "

\n\n\n" \ "
\n" \ "Marginal Genotypic TDT\n
GenotypeTransNot Tr\n" while (NF==3) { print "
" $1 \ "" $2 \ "" $3 getline } printf "
\n\n" # # table of global allelic TDT results # }else{if ($0 ~ /Global Allelic TDT/) { getline getline getline printf "

\n\n\n" \ "
\n" \ "Global Allelic TDT
All 1All 2Tr=1Tr=2TDT\n" while ($1>0) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 \ "" $5 getline } printf "
\n\n" # # table of ibs ASP sharing # }else{if (ibs_table) { printf "

\n\n
" \ "No. of Pairs" \ "IBS Sharing" \ "Mean IBS sharing\n" \ "
2/21/20/2ObsExp\n" getline getline getline while(NF==7) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 \ "" $5 \ "" $6 \ "" $7 "\n" getline } printf "
\n\n" ibs_table=0 # # table of allele frequencies # }else{if (freq_table) { printf "

\n\n" \ "
AlleleFrequencyCountHistogram" getline while(NF>0) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 getline } printf "
\n\n" freq_table=0 # # Detailed HWE table # }else{if ($2=="Observed" && $3=="Genotypes") { printf "

\n\n\n"\ "
\n" \ "Observed genotype counts\n" \ "
GenotypeObservedExpectedDeviate\n" getline getline getline while (NF>1) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 getline } getline printf "
Total" \ "" $2 "\n
\n\n" # # tables of segregation ratios and recurrence risks # }else{if ($0=="Mating Type UxU UxA AxA" || \ $0=="Relative pair RecRisk Aff-Aff Aff-UnA" || \ $0=="Total sample All Fndrs Nonfndrs") { printf "

\n\n
" $1 " " $2 \ "" $3 "" $4 "" $5 "\n" getline getline while (length($0)>0) { print "
" substr($0,1,11) \ "" substr($0,12,9) \ "" substr($0,21,9) \ "" substr($0,30,9) getline } printf "
\n\n" # # Marker Homozygosity test # }else{if ($1=="Marker" && $2=="N") { printf "

\n\n" \ "\n" \ "
MarkerN" \ "ObsExpFZEmp PIter\n" getline getline while (NF>1) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 \ "" $5 \ "" $6 \ "" $7 \ "" $8 getline } printf "
\n\n" # # Kaplan-Meier table # }else{if ($1=="Age-at-onset" && $2=="Failed") { printf "

\n\n" \ "
Age-at-onsetFailed" \ "RisksetH(t)S(t)ase" getline getline while(NF==6) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 \ "" $5 \ "" $6 getline } printf "
\n\n" # # Quantitative trait descriptive stats # }else{if ($1=="Descriptive" && $2=="Stats") { printf "

\n\n\n
\n" \ "Quantitative trait descriptive statistics\n" \ "
Statistic" \ "AllFoundersNonfounders\n" getline getline while (NF>0) { print "
" substr($0,1,12) \ "" substr($0,15,13) \ "" substr($0,28,13) \ "" substr($0,41,13) getline } printf "
\n\n" }else{if ($2=="Familial" && $3=="correlations") { printf "

\n\n\n
\n" \ "Familial correlations (pairwise)\n" \ "
Rel 1Rel 2" \ "Variance 1Variance 2Correlation" \ "N Pairs\n" getline getline getline while (NF>0) { print "
" $1 "" $2 \ "" substr($0,15,13) \ "" substr($0,28,13) \ "" substr($0,41,13) \ "" $NF getline } printf "
\n\n" }else{if ($1=="Fain" && $2=="sibship") { printf "

\n\n\n" getline getline while (length($0)>0) { print "
\n" \ "Fain sibship variance test\n
" substr($0,1,14) \ "" substr($0,15) getline } printf "
\n\n" # # List of pedigrees # }else{if ($1=="Pedigree:" && $3=="No.") { printf "

\n\n
PedigreeNo. members" \ "No. foundersNo. sibships\n" while ($1=="Pedigree:") { print "
" $2 "" $5 "" $8 "" $11 getline } printf "
\n\n" # # QTL Anova # }else{if ($2=="QTL" && $3=="Association") { printf "

\n\n\n
\n" \ "QTL Association Analysis\n" \ "
Allele" \ "Allelic MeanStand ErrorCount\n" getline getline getline while (NF==4) { print "
" $1 \ "" $2 \ "" $3 \ "" $4 getline } printf "
\n\n" }else{if ($2=="Association" && $3=="Analysis") { printf "

\n\n\n
\n" \ "Binary Trait Association Analysis\n" \ "
Allele" \ "AffectedUnaffectedTotalDev\n" getline getline getline while (NF>1) { print "
" $1 \ "" $2 " " $3 \ "" $4 " " $5 \ "" $6 \ "" $7 getline } getline print "
Total" \ "" $2 \ "" $3 \ "" $4 printf "
\n\n" }else{if ($1=="->") { $1="" printf "

\n" \ "[CMD: " $0 " ]\n" \ "

\n" # # A pedigree drawing either single locus -- of errors, or multilocus -- of # haplotypes # }else{if ($1=="Sibship:") { print "

" print $0 print "

" getline while (NF!=2) { if (NF>0) { print "" $0 "
" }else{ print " " } getline } print "

"
                while (NF>0) {
                  print $0
                  getline
                }
                print "
" }else{if ($1=="This" && $2=="job" && $3=="took") { printf "
\n" $0 "\n
\n" }else{if (length($0)>0) { if (last_unfo!=(NR-1)) { print "

" } last_unfo=NR print $0 "
" }}}}}}}}}}}}}}}}}}}}}}}}}} } END { printf "\n\n" }' $1