# braney 2020-03-10 mkdir -p /hive/data/genomes/wuhCor1/bed/snpView cd /hive/data/genomes/wuhCor1/bed/snpView awk '/^s/ {print $2}' ../multiz119way/defraged.multiz119way.maf | sed 's/\..*//' > species.lst for i in `cat species.lst`; do f=`echo $i | tr '_' '-'`; echo "s/$f/$i/g"; done > backSedScript.txt for i in `cat species.lst`; do f=`echo $i | tr '_' '-'`; echo "s/$i/$f/g"; done > foreSedScript.txt sed -f foreSedScript.txt species.lst > editSpecies.lst mafGene -exons wuhCor1 multiz119way singleCover species.lst stdout | sed -f foreSedScript.txt > nonsyn.faa paSNP editSpecies.lst nonsyn.faa stdout | sed 's/:/ /' | sed 's/-/ /' | awk '{print $1, $2-1, $3, $4, 1583, "+", $2-1, $3, "255,0,0", 1, $3-($2 - 1), 0}' | sed -f backSedScript.txt > nonsyn.bed mafGene -uniqAA -exons wuhCor1 multiz119way singleCover species.lst stdout | sed -f foreSedScript.txt >syn.faa paSNP editSpecies.lst syn.faa stdout | sed 's/:/ /' | sed 's/-/ /' | awk '{print $1, $2-1, $3, $4, 1819, "+", $2-1, $3, "0,255,0", 1, $3 - ($2 - 1), 0}' | sed -f backSedScript.txt > syn.bed mafToSnpBed wuhCor1 ../multiz119way/defraged.multiz119way.maf ../multiz119way/mafFrames/singleCover.gp stdout | sed 's/wuhCor1.//' > single.bed #these should all disappear on the merge grep "1580$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' > codingVariant.bed grep "1623$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' > utrVariant.bed grep "1624$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "255,255,0", 1, $3 -$2, 0}' >> utrVariant.bed grep " 0$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "240,240,180", 1, $3 -$2, 0}' > missing.bed grep "1628$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "0,0,0", 1, $3 -$2, 0}' > intergenic.bed grep "1627$" single.bed | awk '{print $1, $2, $3, $4, $5, "+", $2, $3, "0,0,0", 1, $3 -$2, 0}' > intron.bed echo "select * from chromInfo" | hgsql wuhCor1 | tail -n +2 > chrom.sizes rm output.bed for i in `cat species.lst` do echo $i grep -wh "$i" nonsyn.bed syn.bed codingVariant.bed utrVariant.bed intron.bed intergenic.bed missing.bed | bedSmash stdin chrom.sizes stdout >> output.bed done # make codingVariants into missing data instead of showing blue awk '{print $1,$2,$3,$4,$5}' output.bed | sed 's/ 1580$/ 0/' > load.bed cut -f1,3 ~/kent/src/hg/makeDb/doc/wuhCor1/nameList119.txt \ | sed -e 's/ /_/g;' | grep -v NC_045512v2 \ | awk -F$'\t' '{printf "s#%s#%s#g;\n", $1, $2}' > accToName.sed cut -f1,3 ~/kent/src/hg/makeDb/doc/wuhCor1/nameList119.txt | sed -e 's/ /_/g;' \ | awk -F$'\t' '{printf "s#%s.%s#%s#g;\n", $1, $1, $2}' > accToName.sed cat load.bed | sed -f accToName.sed > strainLoad.bed # verify enough correct names, should be 118: awk '{print $4}' strainLoad.bed | sort | uniq -c | wc -l 118 # and one chromosome name: awk '{print $1}' strainLoad.bed | sort | uniq -c 205615 NC_045512v2 hgLoadBed wuhCor1 mafSnp119way load.bed hgLoadBed wuhCor1 mafSnpStrainName119way strainLoad.bed