# for emacs: -*- mode: sh; -*- # starting the database cd /hive/data/genomes/eboVir3 cat << '_EOF_' > eboVir3.config.ra # Config parameters for makeGenomeDb.pl: db eboVir3 clade virus genomeCladePriority 1998 scientificName Filoviridae ebolavirus commonName Ebola virus 2014 assemblyDate Jun. 2014 assemblyLabel West Africa 01 June 2014 EBOV/G3683/KM034562.1 assemblyShortLabel G3683/KM034562.1 orderKey 10798 mitoAcc none fastaFiles /hive/data/genomes/eboVir3/ucsc/KM034562v1.fa.gz agpFiles /hive/data/genomes/eboVir3/ucsc/KM034562v1.agp # qualFiles none dbDbSpeciesDir ebola photoCreditURL http://phil.cdc.gov/phil/details.asp?pid=1836 photoCreditName CDC/Cynthia Goldsmith Public Health Image Library (PHIL) ncbiGenomeId 4887 ncbiAssemblyId KM034561 ncbiAssemblyName KM034561 ncbiBioProject 257197 genBankAccessionID KM034561 # http://www.ncbi.nlm.nih.gov/biosample/SAMN02951977 taxId 186538 '_EOF_' # << happy emacs # test sequence and AGP makeGenomeDb.pl -stop=agp eboVir3.config.ra \ -dbHost=hgwdev -fileServer=hgwdev -workhorse=hgwdev > agp.log 2>&1 # verify that looks good, check the agp.log and the resulting *.agp file # continuing: makeGenomeDb.pl -continue=db eboVir3.config.ra \ -dbHost=hgwdev -fileServer=hgwdev -workhorse=hgwdev > db.log 2>&1 # add 2bit file to gbdb ln -s eboVir3.unmasked.2bit eboVir3.2bit ln -s `pwd`/eboVir3.unmasked.2bit /gbdb/eboVir3/eboVir3.2bit ######################################################################### # repeat masking (DONE - 2015-04-18 - Hiram) # # RepeatMasker doesn't do virus # trf/simpleRepeats finds nothing # windowMasker finds a couple of bits mkdir /hive/data/genomes/eboVir3/bed/windowMasker cd /hive/data/genomes/eboVir3/bed/windowMasker time (doWindowMasker.pl -buildDir=`pwd` \ -workhorse=hgwdev eboVir3) > do.log 2>&1 # real 0m24.311s # fails on final measurement with rmsk which does not exist time (doWindowMasker.pl -continue=cleanup -buildDir=`pwd` \ -workhorse=hgwdev eboVir3) > cleanup.log 2>&1 # real 0m4.435s ######################################################################### # cpgIslandsUnmasked (NOT DONE) # only finds one bit which isn't relevant to anything mkdir /hive/data/genomes/eboVir3/bed/cpgIslandsUnmasked cd /hive/data/genomes/eboVir3/bed/cpgIslandsUnmasked time (doCpgIslands.pl -buildDir=`pwd` -bigClusterHub=ku \ -tableName=cpgIslandExtUnmasked -dbHost=hgwdev -smallClusterHub=ku \ -workhorse=hgwdev \ -maskedSeq=/hive/data/genomes/eboVir3/eboVir3.unmasked.2bit eboVir3) \ > do.log 2>&1 ######################################################################### # BLATSERVERS ENTRY (DONE - 2014-09-16 - Hiram) # After getting a blat server assigned by the Blat Server Gods, # delete previous temporary hgsql -e 'DELETE FROM blatServers where db="eboVir3";' hgcentraltest hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("eboVir3", "blat4d", "17852", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("eboVir3", "blat4d", "17853", "0", "1");' \ hgcentraltest # test it with some sequence ############################################################################ # lastz Marburg sequences (DONE - 2014-09-18 - Hiram) mkdir /hive/data/genomes/eboVir3/bed/lastzMarVir1.2014-09-18 cd /hive/data/genomes/eboVir3/bed/lastzMarVir1.2014-09-18 cat << '_EOF_' > DEF # Ebola virus single sample: KM034562.1 BLASTZ=/cluster/bin/penn/lastz-distrib-1.03.52/bin/lastz BLASTZ_H=2000 BLASTZ_Y=3400 BLASTZ_L=4000 BLASTZ_K=2200 BLASTZ_Q=/cluster/data/blastz/HoxD55.q # TARGET - eboVir3 KM034562.1 SEQ1_DIR=/hive/data/genomes/eboVir3/eboVir3.2bit SEQ1_CHUNK=200000 SEQ1_LAP=0 SEQ1_LEN=/hive/data/genomes/eboVir3/chrom.sizes # QUERY - marVir1 2 strains SEQ2_DIR=/hive/data/genomes/marVir1/marVir1.2bit SEQ2_CHUNK=200000 SEQ2_LAP=0 SEQ2_LEN=/hive/data/genomes/marVir1/chrom.sizes BASE=/hive/data/genomes/eboVir3/bed/lastzMarVir1.2014-09-18 TMPDIR=/dev/shm '_EOF_' # << happy emacs time (doBlastzChainNet.pl `pwd`/DEF \ -syntenicNet -stop=net -fileServer=hgwdev \ -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \ -chainMinScore=100 -chainLinearGap=loose \ -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > do.log 2>&1 # real 1m50.424s # need to fixup the load step time (doBlastzChainNet.pl `pwd`/DEF \ -debug -syntenicNet -continue=load -stop=load -fileServer=hgwdev \ -tRepeats=windowmaskerSdust -qRepeats=windowmaskerSdust \ -chainMinScore=100 -chainLinearGap=loose \ -workhorse=hgwdev -smallClusterHub=ku -bigClusterHub=ku) > load.log 2>&1 # real 1m50.424s featureBits -countGaps eboVir3 chainMarVir1Link # 640008 bases of 889112 (71.983%) in intersection ############################################################################ # Genscan (DONE - 2014-09-18 - Hiram) mkdir /hive/data/genomes/eboVir3/bed/genscan cd /hive/data/genomes/eboVir3/bed/genscan time (doGenscan.pl -buildDir=`pwd` -bigClusterHub=ku -workhorse=hgwdev \ -dbHost=hgwdev eboVir3) > do.log 2>&1 cat fb.eboVir3.genscan.txt # 14316 bases of 18957 (75.518%) in intersection ############################################################################ # set default position on the GP gene area: hgsql -e \ 'update dbDb set defaultPos="KM034562v1:1-18957" where name="eboVir3";' \ hgcentraltest ############################################################################ # joinerCheck and download files (DONE - 2014-09-30 - Hiram) # add entries to all.joiner to get this clean: joinerCheck -database=eboVir3 -tableCoverage ./all.joiner # should be no output for tableCoverage joinerCheck -database=eboVir3 -keys ./all.joiner Checking keys on database eboVir3 eboVir3.chainMarVir1.id - hits 2 of 2 (100.000%) ok eboVir3.chainMarVir1Link.chainId - hits 541 of 541 (100.000%) ok eboVir3.chainMarVir1Link.chainId - hits 541 of 541 (100.000%) ok eboVir3.chromInfo.chrom - hits 1 of 1 (100.000%) ok cd /hive/data/genomes/eboVir3 makeDownloads.pl -allowMissedTrfs -ignoreRepeatMasker \ -dbHost=hgwdev -workhorse=hgwdev eboVir3 > downloads.log 2>&1 # the jkStuff/doCompress.csh needed to have some business # commented out to make it complete. It was run manually, and # the resulting README files were edited. ##############################################################################