# # mergeped 20021213 David L Duffy # # Read one or more sib-pair pedigree files and corresponding scripts # and merge these into a single pedigree file and script # # # > mergeped script1 script2 # # script1 script2 --> mergeped.in # ========== =========== =========== # set loc a qua set loc a qua set loc a qua # read ped pedfile1 read ped pedfile2 set loc a_v2 qua # read pedigree mergeped.ped # # pedfile1 pedfile2 --> mergeped.ped # =========== =========== ============ # 1 1 x x x 1 1 1 x x m 1 1 1 x x m 1 1 # 1 2 x x f 2 1 2 x x 2 2 1 2 x x f 2 2 # 1 3 1 2 m 2 1 3 1 3 m 2 1 3 1 2 m 2 2 # 1 4 1 3 m 2 1 4 1 3 m x 2 # mergeped.err # ============ # pedfile2: Mothers of 1-3 (2 and 3) do not match! # # Model: # Most individuals are present in more than one pedigree file, but allowed # to be absent in all but one. # # Extra records with the same pedigree and individual ID are deleted # # Where pedigree information about an individual differs between # pedigree files, missing data (ie parents or sex in master record) # is updated, but incongruent data (ie different parents or sex) # is flagged. # # Loci may present in more than one pedigree files, # and loci of the same name in different scripts are assumed to be # different loci, and will be appropriately relabelled. # # Loci in "mergeped.in" are ordered according to their appearance in the # original scripts. # AWK=/usr/bin/gawk ERRLOG="mergeped.err" SCRIPT="mergeped.in" PEDFILE="mergeped.ped" PEDLIST=peds$$ IDLIST=idlist$$ MASTER=0 HELP=0 if [ $# = 0 ] || [ $# = 1 ] then HELP=1 else for i in $* do if [ "$i" = "-m" ] || [ "$i" = "--master" ] then shift 1 MASTER=1 elif [ ! -f $i ] then HELP=1 echo "ERROR: $i does not exist" echo fi done fi if [ $HELP = 1 ] then echo "Usage: mergeped [-m|--master] ..." echo "Merge sib-pair type pedigree files and scripts" echo "" echo "eg mergeped script1 script2" echo "" echo "script1 script2 --> mergeped.in" echo "========== =========== ===========" echo "set loc a qua set loc a qua set loc a qua" echo "read ped pedfile1 read ped pedfile2 set loc a_v2 qua" echo " read pedigree mergeped.ped" echo "" echo "pedfile1 pedfile2 --> mergeped.ped" echo "=========== =========== ============" echo "1 1 x x x 1 1 1 x x m 1 1 1 x x m 1 1 " echo "1 2 x x f 2 1 2 x x 2 2 1 2 x x f 2 2 " echo "1 3 1 2 m 2 1 3 1 3 m 2 1 3 1 2 m 2 2 " echo " 1 4 1 3 m 2 1 4 1 3 m x 2 " echo "mergeped.err" echo "============" echo "pedfile2: Mothers of 1-3 (2 and 3) do not match!" exit fi # # Merge the scripts # $AWK -v pedfile=$PEDFILE -v pedlist=$PEDLIST ' BEGIN { thisfile="" } FILENAME!=thisfile { thisfile=FILENAME run=0 } substr($1,1,1) ~ /[!#]/ { print $0 } $1=="run" { run=1 } $1=="set" && substr($2,1,3)=="loc" && run==0 { used[$3]++ ver="" if (used[$3]!=1) ver="_v" used[$3] $3=$3 ver print $0 } substr($1,1,3)=="rea" && (substr($2,1,3)=="ped" || substr($2,1,3)=="lin") { print $3 > pedlist } END { print "read pedigree " pedfile print "run" print " " }' $* > $SCRIPT # # Merge the pedigree files # if [ $MASTER = 1 ] then head -1 $PEDLIST > $IDLIST else cp $PEDLIST $IDLIST fi > $PEDFILE for fil in `cat $IDLIST` do $AWK '$1 !~ "[#!]" { print $1 "_sort_" $2, $1, $2, $3, $4, $5 }' $fil >> $PEDFILE done sort -u -k1,1 $PEDFILE > tmp.ped$$ mv tmp.ped$$ $PEDFILE # # Update pedigree structure for each record using other pedigree files # > $ERRLOG for fil in `cat $PEDLIST` do $AWK '$1 !~ "[#!]" { printf "%s%s%s %s %s %s\n", $1, "_sort_", $2, $3, $4, $5 }' $fil > tmp.ped$$ sort -u -k1,1 tmp.ped$$ > part2.ped$$ NMATCH=`join $PEDFILE part2.ped$$ | wc -l` if [ $NMATCH = 0 ] then echo "No IDs from \"$fil\" match master list!" > $ERRLOG else join -a1 $PEDFILE part2.ped$$ > tmp.ped$$ pad tmp.ped$$ | $AWK -v fil=$fil -v errlog=$ERRLOG ' BEGIN { sex[1]="m"; sex["M"]="m"; sex["m"]="m" sex[2]="f"; sex["F"]="f"; sex["f"]="f" } { if (($4=="x" || $4==".") && $7!="x" && $7!=".") $4=$7 if (($5=="x" || $5==".") && $8!="x" && $8!=".") $5=$8 if (($6=="x" || $6==".") && $9!="x" && $9!=".") $6=sex[$9] if ($4!="x" && $4!="." && $7!="x" && $7!="." && $4!=$7) { print fil ": Fathers of " $2 "-" $3 \ " (" $4 " and " $7 ") do not match master!" > errlog } if ($5!="x" && $5!="." && $8!="x" && $8!="." && $5!=$8) { print fil ": Mothers of " $2 "-" $3 \ " (" $5 " and " $8 ") do not match master!" > errlog } if ($6!="x" && $6!="." && $9!="x" && $9!="." && sex[$6]!=sex[$9]) { print fil ": Sexes of " $2 "-" $3 \ " (" $6 " and " $9 ") do not match master!" > errlog } print $1,$2,$3,$4,$5,$6 }' > $PEDFILE fi done cat $ERRLOG # # Add phenotypes to each record # for fil in `cat $PEDLIST` do $AWK ' BEGIN { FS="[ /\t]+" } { gsub("^[ ]+","") printf "%s%s%s", $1, "_sort_", $2 for(i=6;i<=NF;i++) { printf " %s",$i } printf "\n" }' $fil > tmp.ped$$ sort -u -k1,1 tmp.ped$$ > part2.ped$$ join -a1 $PEDFILE part2.ped$$ > tmp.ped$$ pad tmp.ped$$ > $PEDFILE done $AWK '{ $1=""; print $0}' $PEDFILE | bester > tmp.ped$$ mv tmp.ped$$ $PEDFILE rm part2.ped$$ $IDLIST $PEDLIST if ! [ -s $ERRLOG ] then rm $ERRLOG fi