# # Write a list of genotype changes to convert ped1 into ped2 # AWK=/bin/gawk if [ $# -lt 2 ] || ! [ -f $1 ] || ! [ -f $2 ] || [ "$1" = "--help" ] || [ "$1" = "-h" ] then echo "usage: diffped [] " echo " Generate list of differences in membership of the two" echo " pedigree files. Alternatively (if script specified)" echo " Generate list of differences in pheno/genotypes for same" echo " individual in first and second pedigree files." echo " Each pedigree is assumed to contain the same loci." exit fi if [ $# = 2 ] then mkdir tmpdir$$ $AWK '$1 !~ /^[#!]/ {print $1 "--" $2}' $1 | sort -k1,1 > tmpdir$$/ids1 $AWK '$1 !~ /^[#!]/ {print $1 "--" $2}' $2 | sort -k1,1 > tmpdir$$/ids2 for fil in $* do $AWK '$1 !~ /^[#!]/ {print}' $fil > tmpdir$$/$fil done cd tmpdir$$ wc -l $* | $AWK 'BEGIN{ getline; l1=$1; ped1=$2 getline; l2=$1; ped2=$2 print ped1 " (N=" l1 ") v. " ped2 " (N=" l2 ")" }' comm -3 ids1 ids2 cd .. rm -rf tmpdir$$ else $AWK -v ped1=$2 -v ped2=$3 ' BEGIN { loc=0 pos=5 run=0 } $1=="run" { run=1 } !run && $1=="set" && substr($2,1,3)=="loc" { loc++ name[loc]=$3 locpos[loc]=(pos+1) loctyp[loc]=substr($4,1,1) if (substr($4,1,3)=="mar" || substr($4,1,3)=="xma") { pos+=2 }else{ pos++ } } END { while ((getline < ped1)==1) { if (substr($1,1,1) !~ /[#!]/) { if (NF!=pos) { print "ERROR: Incorrect number of fields in " ped1 " (" NF ")" } rec1[$1 " " $2]=$0 }} while ((getline < ped2)==1) { if (substr($1,1,1) !~ /[#!]/) { if (NF!=pos) { print "ERROR: Incorrect number of fields in " ped2 " (" NF ")" } rec2[$1 " " $2]=$0 }} for (i in rec1) { if (i in rec2) { if (rec1[i]!=rec2[i]) { nr1=split(rec1[i],data1) nr2=split(rec2[i],data2) for (j=1;j<=loc;j++) { if (loctyp[j] ~ "[mx]" && \ (data1[locpos[j]]!=data2[locpos[j]] || \ data1[locpos[j]+1]!=data2[locpos[j]+1])) { print "edit",i,name[j], \ "from",data1[locpos[j]],data1[locpos[j]+1], \ "to",data2[locpos[j]],data2[locpos[j]+1] }else if (loctyp[j] ~ "[aq]" && \ data1[locpos[j]]!=data2[locpos[j]] ) { print "edit",i,name[j], "from",data1[locpos[j]], \ "to",data2[locpos[j]] } } } }else{ print "NOTE: " ped2 " does not contain a record for individual " i } } }' $1 fi