############################################################################## ### chain the chains obtained from the HPRC project ### done - 2023-08-14 - Hiram mkdir /hive/data/genomes/hg38/bed/hprc/chain cd /hive/data/genomes/hg38/bed/hprc/chain # avoid the broken one GCA_018504655.1 ls ../bigChainToChain/hg38.chainHprc*.chain | grep -v GCA_018504655.1 \ > chain.list # using this 'runOne' script for each one: ####################################################################### #!/bin/bash set -beEu -o pipefail export hg38TwoBit="/hive/data/genomes/hg38/hg38.2bit" export chainFile="${1}" export inChain="../bigChainToChain/${chainFile}" export asmName=`echo $chainFile | sed -e 's/hg38.chainHprc//; s/.chain//;'` # may already be done if [ -s "${chainFile}" ]; then exit 0 fi export buildDir="" export twoBit="" export asmId="" case "${asmName}" in GCA_*) gcX="${asmName:0:3}" d0="${asmName:4:3}" d1="${asmName:7:3}" d2="${asmName:10:3}" tB="`ls -d /hive/data/genomes/asmHubs/allBuild/${gcX}/${d0}/${d1}/${d2}/${asmName}_*`" buildDir="`realpath ${tB}`" asmId="`basename $buildDir`" twoBit="${buildDir}/${asmId}.2bit" ;; T2T-*) asmId="hs1" buildDir="/hive/data/genomes/$asmId" twoBit="${buildDir}/${asmId}.2bit" ;; *) printf "ERROR: unrecognized $asmName\n" 1>&2 exit 255 ;; esac printf "%s\n" "${chainFile}" 1>&2 time (chainToAxt "${inChain}" "${hg38TwoBit}" \ "${twoBit}" stdout | axtChain -minScore=0 -linearGap=loose \ stdin "${hg38TwoBit}" "${twoBit}" stdout | chainDupFilter stdin /scratch/tmp/${asmName}.noDup.chain /scratch/tmp/${asmName}.dup.chain ) > err/${chainFile} 2>&1 time (chainToAxt /scratch/tmp/${asmName}.dup.chain "${hg38TwoBit}" \ "${twoBit}" stdout | axtChain -minScore=0 -linearGap=loose \ stdin "${hg38TwoBit}" "${twoBit}" stdout | cat - /scratch/tmp/${asmName}.noDup.chain | chainSort stdin stdout | awk '/chain/ {$13=NR} { print}' > "${chainFile}" ) > err/${chainFile} 2>&1 rm -f /scratch/tmp/${asmName}.dup.chain /scratch/tmp/${asmName}.noDup.chain ############################################################## to generate a jobList printf "#LOOP ./runOne $(path1) #ENDLOOP " > template gensub2 chain.list single template jobList # running them all, one at a time mkdir featBits counts err time (~/kent/src/hg/utils/automation/perlPara.pl 1 jobList) > do.log 2>&1 # real 236m9.479s ############################################################################## # turn those chains into nets (DONE - 2023-08-15 - Hiram) mkdir /hive/data/genomes/hg38/bed/hprc/net cd /hive/data/genomes/hg38/bed/hprc/net ls ../chain | grep hg38.chainHprc > chain.list printf '#LOOP runOne $(path1) {check out exists+ $(root1).net} #ENDLOOP ' > template gensub2 chain.list single template jobList para -ram=4g create jobList para push para time Completed: 88 of 88 jobs CPU time in finished jobs: 285s 4.74m 0.08h 0.00d 0.000 y IO & Wait Time: 206s 3.44m 0.06h 0.00d 0.000 y Average job time: 6s 0.09m 0.00h 0.00d Longest finished job: 8s 0.13m 0.00h 0.00d Submission to last job: 8s 0.13m 0.00h 0.00d ########################### Loading these nets: #!/bin/bash set -beEu -o pipefail ls hg38.chainHprc*.net | while read netFile do export asmName=`echo $netFile | sed -e 's/hg38.chainHprc//; s/.net//;'` export buildDir="" export twoBit="" export asmId="" case "${asmName}" in GCA_*) asmId=`echo $asmName | sed -e 's/\./v/;'` ;; T2T-*) asmId="Hs1" ;; *) printf "ERROR: unrecognized $asmName\n" 1>&2 exit 255 ;; esac printf "hgLoadNet -warn hg38 netHprc${asmId} ${netFile}\n" 1>&2 hgLoadNet -warn hg38 netHprc${asmId} ${netFile} done