# deCODE and 1000 Genomes recombination rates super track # Max, Thu Sep 22 10:23:39 PDT 2022 cd /hive/data/genomes/hg38/bed/recombRate mkdir orig zcat orig/aau1043_datas1.gz | grep -v '^#' | grep -v cMperMb | less | cut -f1-4 > recombPat.bedgraph zcat orig/aau1043_datas2.gz | grep -v '^#' | grep -v cMperMb | less | cut -f1-4 > recombMat.bedgraph zcat orig/aau1043_datas3.gz | grep -v '^#' | grep -v cMperMb | less | cut -f1-4 > recombAvg.bedgraph bedGraphToBigWig recombPat.bedgraph ../../chrom.sizes recombPat.bw bedGraphToBigWig recombAvg.bedgraph ../../chrom.sizes recombAvg.bw bedGraphToBigWig recombMat.bedgraph ../../chrom.sizes recombMat.bw # crossover events zless orig/aau1043_datas4.gz | grep -v '^#' | grep -v medsnp > events.bed bedSort events.bed events.bed bedToBigBed events.bed -as=${HOME}/kent/src/hg/makeDb/autoSql/recombEvents.as -tab ../../chrom.sizes events.bb -type=bed4+ # de novo mutations less orig/aau1043_datas5.tsv | grep -v \# | grep -v Chr | tawk '{print($1,$2,$2+1, $3">"$4, $5, $6, $7, $8)}' > denovo.bed bedSort denovo.bed denovo.bed bedToBigBed denovo.bed ../../chrom.sizes recombDenovo.bb -type=bed4+ -as=${HOME}/kent/src/hg/makeDb/autoSql/recombDenovo.as -tab # 1000 Genomes subtrack # the 1000 genomes map, received makeDocs from poruloh@broadinstitute.org # the following makedocs after ---- are NOT from UCSC, they are from Po-Ru: ----- I believe I downloaded the hg19 map from either the HapMap website or the IMPUTE2 website (maybe the same?): https://ftp.ncbi.nlm.nih.gov/hapmap/recombination/latest/rates/ https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.html ### liftOver + post-processing script: HG19_BED=genetic_map_hg19_withX.bed # prepare UCSC .bed file with hg19 map data zcat genetic_map_hg19_withX.txt.gz | awk -v OFS=$'\t' 'NR>1 {if ($1==23) $1="X"; print "chr"$1,$2-1,$2,"chr"$1","$2","$4}' > $HG19_BED # lift to hg38 HG38_BED_RAW=genetic_map_hg38_withX.bed HG38_BED_UNMAPPED=genetic_map_hg38_withX.bed.unmapped /home/pl88/liftOver/liftOver $HG19_BED /home/pl88/liftOver/hg19ToHg38.over.chain $HG38_BED_RAW $HG38_BED_UNMAPPED # filter sites that moved to different chromosomes; sort in hg38 order HG38_BED_CLEAN=genetic_map_hg38_withX.clean.bed sed 's/,/\t/g' $HG38_BED_RAW | awk '$1==$4' | sed 's/X/23/g' | sed 's/chr//g' | cut -f1,3- | sort -k1,1n -k2,2n > $HG38_BED_CLEAN # for each pair of consecutive hg38 map positions: # - if base pair span in hg38 is within 20% of base pair span in hg19, keep relative cM coordinates # - else, set recombination rate to default 1cM/Mb (with 1cM cap on cM distance between positions) awk ' BEGIN {prevChr=0; prevPos=0; prevPosHg19=0; prevcM19=0; cM38 = 0; print "chr position COMBINED_rate(cM/Mb) Genetic_Map(cM)"} { chr=$1; pos=$2; posHg19=$4; cM19=$5; if (chr==prevChr) { deltaPosRatio = (pos-prevPos) / (posHg19-prevPosHg19); if (0.8 < deltaPosRatio && deltaPosRatio < 1.2) { rate = (cM19-prevcM19) / (pos-prevPos) * 1e6; cM38 += cM19-prevcM19; } else if (-1.2 < deltaPosRatio && deltaPosRatio < -0.8) { rate = (prevcM19-cM19) / (pos-prevPos) * 1e6; cM38 += prevcM19-cM19; } else { cMdiff = (pos-prevPos)*1e-6; if (cMdiff > 1) { print "WARNING_LARGE_CMDIFF",prevChr,prevPos,prevPosHg19,posHg19-prevPosHg19,chr,pos,posHg19,pos-prevPos,(pos-prevPos)/(posHg19-prevPosHg19) > "/dev/stderr"; cMdiff = 1; } rate = cMdiff / (pos-prevPos); cM38 += cMdiff; print "BAD",prevChr,prevPos,prevPosHg19,posHg19-prevPosHg19,chr,pos,posHg19,pos-prevPos,(pos-prevPos)/(posHg19-prevPosHg19) > "/dev/stderr"; } } else { rate=0; cM38 = 0; } printf("%s %d %.10f %.15f\n",chr,pos,rate,cM38); prevChr=chr; prevPos=pos; prevPosHg19=posHg19; prevcM19=cM19; }' $HG38_BED_CLEAN 2> genetic_map_hg38_withX.BAD.txt | gzip > genetic_map_hg38_withX.txt.gz rm $HG19_BED $HG38_BED_RAW $HG38_BED_CLEAN $HG38_BED_UNMAPPED ---- Now the UCSC makedocs, which are very simple: cd orig wget https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/tables/genetic_map_hg38_withX.txt.gz cd .. less orig/genetic_map_hg38_withX.txt.gz | grep -v '^\chr' | sed -e 's/^/chr/' | sed -e 's/chr23/chrX/' | awk 'BEGIN {OFS="\t"; prevChr=0; prevPos=0; } {if ($1==prevChr) { print ($1, prevPos, $2, $3); } else { prevPos=0; print ($1, "0", $2, $3) } prevPos = $2; prevChr=$1;}' | less > recomb1000GAvg.bedgraph bedSort recomb1000GAvg.bedgraph recomb1000GAvg.bedgraph bedGraphToBigWig recomb1000GAvg.bedgraph ../../chrom.sizes recomb1000GAvg.bw