# miRNA/snoRNA tracks for hg38 / GRCh38 ############################################################################## # Update from v13 to v22 mirbase data in the wgRNA table for hg38 # The snoRNA data hasn't been updated in forever so that is staying the same mkdir /hive/data/genomes/hg38/bed/wgRna cd /hive/data/genomes/hg38/bed/wgRna # download data from mirbase, the current points to v22: wget ftp://mirbase.org/pub/mirbase/CURRENT/genomes/hsa.gff3 # Only select the primary transcripts, make coords 0-based so they match gencode # and refseq tracks, and remove ID and Alias entries in column 9: grep -c "miRNA_primary_transcript" hsa.gff3 # 1919 tawk '{if ($3 == "miRNA_primary_transcript") {$4-=1; print;}}' hsa.gff3 \ | grep -v '^#' | tr ';' '\t' | tr '=' '\t' | cut -f1-8,14 > hsa.primaryCleaned.gff3 wc -l hsa.primaryCleaned.gff3 # 1918 # now get into bed format, with type="miRNA" tawk '{print $1, $4, $5, $9, 0, $7, 0, 0, "miRNA";}' hsa.primaryCleaned.gff3 > miRNA.bed # get snoRNA entries from current wgRNA table: hgsql -Ne "select * from wgRna where type != 'miRNA'" hg38 | cut -f2- > wgRna.other.bed # combine and load: cat miRNA.bed wgRna.other.bed > wgRna052518.bed hgLoadBed -tab -renameSqlTable -verbose=4 \ -sqlTable=$HOME/kent/src/hg/lib/wgRna.sql \ -as=$HOME/kent/src/hg/lib/wgRna.as hg38 wgRnaNew wgRna052518.bed # ### kent source version 365 ### # Reading wgRna052518.bed # Read 2320 elements of size 9 from wgRna052518.bed # Sorted # Creating table definition for wgRnaNew from sql: /cluster/home/chmalee/kent/src/hg/lib/wgRna.sql # Saving bed.tab # Loading hg38 # compare old and new tables: hgsql -Ne "select type, count(*) from wgRna group by type" hg38 # CDBox 269 # HAcaBox 112 # miRNA 938 # scaRna 21 hgsql -Ne "select type, count(*) from wgRnaNew group by type" hg38 # CDBox 269 # HAcaBox 112 # miRNA 1918 # scaRna 21 # backup old table and compare to new one before rename. Should be only miRNA update: hgsql -Ne "select * from wgRna" hg38 | cut -f2- > wgRna.backup comm -23 <(sort -k1 -k2n wgRna052518.bed) <(sort -k1 -k2n wgRna.backup) | cut -f9 | sort -u miRNA # rename wgRnaNew table: hgsqlSwapTables -dropTable3 hg38 wgRnaNew wgRna wgRnaOld