# for emacs: -*- mode: sh; -*- # This file describes browser build for the nasLar1 # Proboscis monkey/Nasalis larvatus # Assembly Statistics Report # Assembly Name: Charlie1.0 # Organism name: Nasalis larvatus # Taxid: 43780 # Submitter: Proboscis Monkey Functional Genome Consortium # Date: 2014-11-10 # BioSample: SAMN02689222 # Assembly type: haploid # Release type: major # Assembly level: Chromosome # Genome representation: full # GenBank Assembly ID: GCA_000772465.1 (species-representative latest) # ## Assembly-Units: ## GenBank Unit ID RefSeq Unit ID Assembly-Unit name ## GCA_000772475.1 Primary Assembly # chrMT: NC_008216.1 bioproject PRJNA17205 ############################################################################# # fetch sequence from new style download directory (DONE - 2014-11-26 - Hiram) # NCBI has redesigned their FTP download site, new type of address # and naming scheme mkdir -p /hive/data/genomes/nasLar1/genbank cd /hive/data/genomes/nasLar1/genbank rsync -L -a -P \ rsync://ftp.ncbi.nlm.nih.gov/genomes/genbank/vertebrate_mammalian/Nasalis_larvatus/all_assembly_versions/GCA_000772465.1_Charlie1.0/ ./ # measure what we have here: faSize GCA_000772465.1_Charlie1.0_genomic.fna.gz # 3011966170 bases (614872980 N's 2397093190 real 1644030343 upper # 753062847 lower) in 319549 sequences in 1 files # Total size: mean 9425.7 sd 1088158.8 min 201 (JMHX01238739.1) # max 215897965 (CM002998.1) median 823 # %25.00 masked total, %31.42 masked real faSize \ GCA_000772465.1_Charlie1.0_assembly_structure/Primary_Assembly/assembled_chromosomes/FASTA/*.fna.gz \ GCA_000772465.1_Charlie1.0_assembly_structure/Primary_Assembly/unplaced_scaffolds/FASTA/unplaced.scaf.fna.gz # 3011966170 bases (614872980 N's 2397093190 real 2397093190 upper # 0 lower) in 319549 sequences in 22 files # Total size: mean 9425.7 sd 1088158.8 min 201 (JMHX01238739.1) # max 215897965 (CM002998.1) median 823 # note that these totals do not include chrM since the GenBank ftp directory # did not include a non-nuclear directory # typical chrom names in component_localID2acc #Component ID Accession.version Nasalis_CHR1 JMHX01319529.1 Nasalis_CHR10 JMHX01319539.1 Nasalis_CHR11 JMHX01319540.1 ... Nlarvatus_CHRUc1 JMHX01143513.1 Nlarvatus_CHRUc10 JMHX01143514.1 Nlarvatus_CHRUc100 JMHX01143515.1 Nlarvatus_CHRUc1000 JMHX01143516.1 ... Nlarvatus_CHRUc99996 JMHX01319525.1 Nlarvatus_CHRUc99997 JMHX01319526.1 Nlarvatus_CHRUc99998 JMHX01319527.1 Nlarvatus_CHRUc99999 JMHX01319528.1 ############################################################################# # fixup to UCSC naming scheme (DONE - 2014-11-25 - Hiram) mkdir /hive/data/genomes/nasLar1/ucsc cd /hive/data/genomes/nasLar1/ucsc # script fixed up to alter the given NCBI chrom names into more useful # UCSC names time ./ucscCompositeAgp.pl ../genbank/GCA_000772465.1_Charlie1.0_assembly_structure/Primary_Assembly # CM003001.1 12 # CM003017.1 9 # CM003008.1 19 # CM003002.1 13 # CM003014.1 6 # CM003012.1 4 # CM003010.1 20 # CM003007.1 18 # CM003016.1 8 # CM003006.1 17 # CM002998.1 1 # CM002999.1 10 # CM003000.1 11 # CM003015.1 7 # CM003013.1 5 # CM003004.1 15 # CM003011.1 3 # CM003018.1 X # CM003009.1 2 # CM003005.1 16 # CM003003.1 14 # real 0m47.851s time ./unplaced.pl ../genbank/GCA_000772465.1_Charlie1.0_assembly_structure/Primary_Assembly # real 0m6.736s # verify nothing lost compared to genbank: faSize *.fa # 3011966170 bases (614872980 N's 2397093190 real 2397093190 upper # 0 lower) in 319549 sequences in 22 files # Total size: mean 9425.7 sd 1088158.8 min 201 (chrUn_JMHX01238739v1) # max 215897965 (chr1) median 823 # %0.00 masked total, %0.00 masked real # same numbers as above. # the supplied AGP files are useless, there are no gaps declared # construct fake gaps: cat *.fa | hgFakeAgp stdin ucsc.agp ############################################################################# # Initial database build (DONE - 2014-11-20 - Hiram) cd /hive/data/genomes/nasLar1 cat << '_EOF_' > nasLar1.config.ra # Config parameters for makeGenomeDb.pl: db nasLar1 clade primate genomeCladePriority 15 scientificName Nasalis larvatus commonName Proboscis monkey assemblyDate Nov. 2014 assemblyLabel Proboscis Monkey Functional Genome Consortium assemblyShortLabel Charlie1.0 orderKey 392 # chrM bioproject: 17205 mitoAcc NC_008216.1 fastaFiles /hive/data/genomes/nasLar1/ucsc/*.fa agpFiles /hive/data/genomes/nasLar1/ucsc/ucsc.agp # qualFiles none dbDbSpeciesDir proboscis photoCreditURL https://www.flickr.com/people/50852241@N00 photoCreditName David Dennis - Wikimedia Commons ncbiGenomeId 7994 ncbiAssemblyId 221311 ncbiAssemblyName Charlie1.0 ncbiBioProject 241312 genBankAccessionID GCA_000772465.1 taxId 43780 '_EOF_' # << happy emacs # verify sequence and AGP are OK: time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \ -stop=agp nasLar1.config.ra) > agp.log 2>&1 # real 4m56.562s # verify end of agp.log indicates: # *** All done! (through the 'agp' step) # then finish it off: time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \ -fileServer=hgwdev -continue=db nasLar1.config.ra) > db.log 2>&1 # real 24m21.493s # check in the trackDb files created and add to trackDb/makefile ########################################################################## # running repeat masker (DONE - 2014-12-12 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/repeatMasker cd /hive/data/genomes/nasLar1/bed/repeatMasker time (doRepeatMasker.pl -buildDir=`pwd` \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -smallClusterHub=ku nasLar1) > do.log 2>&1 # real 1298m51.515s cat faSize.rmsk.txt # 3011982740 bases (614872980 N's 2397109760 real 1287460374 upper 1109649386 lower) in 319550 sequences in 1 files # Total size: mean 9425.7 sd 1088157.1 min 201 (chrUn_JMHX01238739v1) max 215897965 (chr1) median 823 # %36.84 masked total, %46.29 masked real egrep -i "versi|relea" do.log # RepeatMasker version open-4.0.5 # January 31 2014 (open-4-0-5) version of RepeatMasker # CC RELEASE 20140131; time featureBits -countGaps nasLar1 rmsk # 1113442810 bases of 3011982740 (36.967%) in intersection # real 2m26.058s # why is it different than the faSize above ? # because rmsk masks out some N's as well as bases, the count above # separates out the N's from the bases, it doesn't show lower case N's ########################################################################## # running simple repeat (DONE 2014-12-11 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/simpleRepeat cd /hive/data/genomes/nasLar1/bed/simpleRepeat time (doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \ -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \ nasLar1) > do.log 2>&1 # real 171m2.051s cat fb.simpleRepeat # 41400812 bases of 2398172200 (1.726%) in intersection # add to rmsk after it is done: cd /hive/data/genomes/nasLar1 twoBitMask nasLar1.rmsk.2bit \ -add bed/simpleRepeat/trfMask.bed nasLar1.2bit # you can safely ignore the warning about fields >= 13 twoBitToFa nasLar1.2bit stdout | faSize stdin > faSize.nasLar1.2bit.txt cat faSize.nasLar1.2bit.txt # 3011982740 bases (614872980 N's 2397109760 real 1286534225 upper # 1110575535 lower) in 319550 sequences in 1 files # Total size: mean 9425.7 sd 1088157.1 min 201 (chrUn_JMHX01238739v1) # max 215897965 (chr1) median 823 # %36.87 masked total, %46.33 masked real rm /gbdb/nasLar1/nasLar1.2bit ln -s `pwd`/nasLar1.2bit /gbdb/nasLar1/nasLar1.2bit ########################################################################## # CREATE MICROSAT TRACK (DONE - 2015-06-22 - Hiram) ssh hgwdev mkdir /cluster/data/nasLar1/bed/microsat cd /cluster/data/nasLar1/bed/microsat awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \ ../simpleRepeat/simpleRepeat.bed > microsat.bed hgLoadBed nasLar1 microsat microsat.bed # Read 22754 elements of size 4 from microsat.bed ########################################################################## ## WINDOWMASKER (DONE - 2014-12-11 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/windowMasker cd /hive/data/genomes/nasLar1/bed/windowMasker time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev nasLar1) > do.log 2>&1 # real 1019m50.392s # Masking statistics cat faSize.nasLar1.cleanWMSdust.txt # 3011982740 bases (614872980 N's 2397109760 real 1630689943 upper # 766419817 lower) in 319550 sequences in 1 files # Total size: mean 9425.7 sd 1088157.1 min 201 (chrUn_JMHX01238739v1) # max 215897965 (chr1) median 823 # %25.45 masked total, %31.97 masked real time featureBits -countGaps nasLar1 rmsk windowmaskerSdust \ > fb.nasLar1.rmsk.windowmaskerSdust.txt 2>&1 # real 5m55.718s cat fb.nasLar1.rmsk.windowmaskerSdust.txt # 553352708 bases of 3011982740 (18.372%) in intersection doWindowMasker.pl -continue=cleanup -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev nasLar1 # Elapsed time: 1m53s ########################################################################## # cpgIslands - (DONE - 2014-12-13 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/cpgIslands cd /hive/data/genomes/nasLar1/bed/cpgIslands time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \ -workhorse=hgwdev -smallClusterHub=ku nasLar1) > do.log 2>&1 # real 436m47.169s cat fb.nasLar1.cpgIslandExt.txt # 11894127 bases of 2398172200 (0.496%) in intersection ############################################################################## # cpgIslands on UNMASKED sequence (DONE - 2014-12-13 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/cpgIslandsUnmasked cd /hive/data/genomes/nasLar1/bed/cpgIslandsUnmasked time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \ -tableName=cpgIslandExtUnmasked \ -maskedSeq=/hive/data/genomes/nasLar1/nasLar1.unmasked.2bit \ -workhorse=hgwdev -smallClusterHub=ku nasLar1) > do.log 2>&1 # real 436m29.843s cat fb.nasLar1.cpgIslandExtUnmasked.txt # 14182180 bases of 2398172200 (0.591%) in intersection ############################################################################# # cytoBandIdeo - (DONE - 2014-12-13 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/cytoBand cd /hive/data/genomes/nasLar1/bed/cytoBand makeCytoBandIdeo.csh nasLar1 ######################################################################### # genscan - (DONE - 2014-12-13 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/genscan cd /hive/data/genomes/nasLar1/bed/genscan time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \ -bigClusterHub=ku nasLar1) > do.log 2>&1 # real 65m53.319s # broken on eight chroms, finish off with split procedure mkdir splitRun cd splitRun rm -f file.list for N in 13 19 8 14 20 3 1 X do twoBitToFa ../../../nasLar1.2bit:chr${N} stdout \ | maskOutFa stdin hard chr${N}.fa mkdir chr${N} faSplit -lift=chr${N}.lift gap chr${N}.fa 2000000 chr${N}/chr${N}_ find ./chr${N} -type f | sed -e 's#^./##;' >> file.list done cat << '_EOF_' > runGsBig.csh #!/bin/csh -ef set chrom = $1 set dir = $2 set resultGtf = $3 set resultPep = $4 set resultSubopt = $5 mkdir -p gtf/$dir pep/$dir subopt/$dir set seqFile = $dir/$chrom.fa /cluster/bin/x86_64/gsBig $seqFile $resultGtf -trans=$resultPep -subopt=$resultSubopt -exe=/scratch/data/genscan/genscan -par=/scratch/data/genscan/HumanIso.smat -tmp=/tmp -window=2400000 '_EOF_' # << happy emacs cat << '_EOF_' > template #LOOP ./runGsBig.csh $(root1) $(lastDir1) {check out exists gtf/$(lastDir1)/$(root1).gtf} {check out exists pep/$(lastDir1)/$(root1).pep} {check out exists subopt/$(lastDir1)/$(root1).bed} #ENDLOOP '_EOF_' # << happy emacs gensub2 file.list single template jobList # Completed: 521 of 521 jobs # CPU time in finished jobs: 32672s 544.54m 9.08h 0.38d 0.001 y # IO & Wait Time: 2417s 40.28m 0.67h 0.03d 0.000 y # Average job time: 67s 1.12m 0.02h 0.00d # Longest finished job: 1834s 30.57m 0.51h 0.02d # Submission to last job: 2027s 33.78m 0.56h 0.02d # combine results export TOP="/hive/data/genomes/nasLar1/bed/genscan/splitRun" for C in chr1 chr13 chr14 chr19 chr20 chr3 chr8 chrX do cd ${TOP}/${C} cat ../gtf/${C}/*.gtf | liftUp -type=.gtf stdout ../${C}.lift error stdin \ | sed -e "s/${C}_0\([0-4]\)\./${C}.\1/g" > ${C}.gtf cat ../subopt/${C}/*.bed | liftUp -type=.bed stdout ../${C}.lift error stdin \ | sed -e "s/${C}_0\([0-4]\)\./${C}.\1/g" > ${C}.subopt.bed cat ../pep/${C}/*.pep | sed -e "s/${C}_0\([0-4]\)\./${C}.\1/g" > ${C}.pep ls -ogrt ${C}.gtf ${C}.subopt.bed ${C}.pep cd ${TOP} done # copy over these split results for C in chr1 chr13 chr14 chr19 chr20 chr3 chr8 chrX do ls -ld ../gtf/*/${C}.gtf ls -ld ../pep/*/${C}.pep ls -ld ../subopt/*/${C}.bed ls -og ${C}/${C}.gtf ${C}/${C}.pep ${C}/${C}.subopt.bed cp -p ${C}/${C}.gtf ../gtf/*/${C}.gtf cp -p ${C}/${C}.pep ../pep/*/${C}.pep cp -p ${C}/${C}.subopt.bed ../subopt/*/${C}.bed ls -ld ../gtf/*/${C}.gtf ls -ld ../pep/*/${C}.pep ls -ld ../subopt/*/${C}.bed done # after finishing off laggards with the splitRun procedure, continuing: time (doGenscan.pl -continue=makeBed -workhorse=hgwdev -dbHost=hgwdev \ nasLar1 ) > makeBed.log 2>&1 # real 12m32.631s cat fb.nasLar1.genscan.txt # 42866785 bases of 2398172200 (1.787%) in intersection cat fb.nasLar1.genscanSubopt.txt # 46110660 bases of 2398172200 (1.923%) in intersection ######################################################################## # Create kluster run files (DONE - 2015-01-06 - Hiram) cd /hive/data/genomes/nasLar1 # numerator is nasLar1 gapless bases "real" as reported by: head -1 faSize.nasLar1.2bit.txt # 3011982740 bases (614872980 N's 2397109760 real 1286534225 upper 1110575535 lower) in 319550 sequences in 1 files # denominator is hg19 gapless bases as reported by: # featureBits -noRandom -noHap hg19 gap # 234344806 bases of 2861349177 (8.190%) in intersection # 1024 is threshold used for human -repMatch: calc \( 2397109760 / 2861349177 \) \* 1024 # ( 2397109760 / 2861349177 ) * 1024 = 857.861184 # ==> use -repMatch=800 according to size scaled down from 1024 for human. # and rounded down to nearest 50 cd /hive/data/genomes/nasLar1 blat nasLar1.2bit \ /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/nasLar1.11.ooc \ -repMatch=800 # Wrote 28620 overused 11-mers to jkStuff/nasLar1.11.ooc # check non-bridged gaps to see what the typical size is: hgsql -N -e 'select * from gap where bridge="no" order by size;' nasLar1 | ave -tableOut -col=7 stdin # # min Q1 median Q3 max mean N sum stddev # 50076 58368.8 70128 100495 1.07816e+07 178173 670 1.19376e+08 672006 # note the minimum non-bridged gap size is 50,076 gapToLift -verbose=2 -minGap=50000 nasLar1 jkStuff/nasLar1.nonBridged.lft \ -bedFile=jkStuff/nasLar1.nonBridged.bed ######################################################################## # GENBANK AUTO UPDATE (DONE - 2015-01-06 - Hiram) ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # /cluster/data/genbank/data/organism.lst shows: # #organism mrnaCnt estCnt refSeqCnt # Nasalis larvatus 4 0 0 # edit etc/genbank.conf to add nasLar1 just after bosTau7 # nasLar1 (Proboscis monkey) nasLar1.serverGenome = /hive/data/genomes/nasLar1/nasLar1.2bit nasLar1.clusterGenome = /hive/data/genomes/nasLar1/nasLar1.2bit nasLar1.ooc = /hive/data/genomes/nasLar1/jkStuff/nasLar1.11.ooc nasLar1.lift = /hive/data/genomes/nasLar1/jkStuff/nasLar1.nonBridged.lft nasLar1.perChromTables = no nasLar1.refseq.mrna.native.pslCDnaFilter = ${ordered.refseq.mrna.native.pslCDnaFilter} nasLar1.refseq.mrna.xeno.pslCDnaFilter = ${ordered.refseq.mrna.xeno.pslCDnaFilter} nasLar1.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter} nasLar1.genbank.mrna.xeno.pslCDnaFilter = ${ordered.genbank.mrna.xeno.pslCDnaFilter} nasLar1.genbank.est.native.pslCDnaFilter = ${ordered.genbank.est.native.pslCDnaFilter} nasLar1.genbank.est.xeno.pslCDnaFilter = ${ordered.genbank.est.xeno.pslCDnaFilter} nasLar1.downloadDir = nasLar1 nasLar1.refseq.mrna.native.load = no nasLar1.refseq.mrna.native.loadDesc = no nasLar1.refseq.mrna.xeno.load = yes nasLar1.refseq.mrna.xeno.loadDesc = yes nasLar1.genbank.mrna.native.load = no nasLar1.genbank.mrna.native.loadDesc = no nasLar1.genbank.est.native.load = no nasLar1.upstreamGeneTbl = refGene # Edit src/lib/gbGenome.c to add new species git commit -m "Added nasLar1; refs #14437" etc/genbank.conf src/lib/gbGenome.c git push # update /cluster/data/genbank/: make etc-update make install-server screen # control this business with a screen since it takes a while cd /cluster/data/genbank time ./bin/gbAlignStep -initial nasLar1 # logFile: var/build/logs/2015.01.06-10:53:22.nasLar1.initalign.log # real 53m1.297s # verify completed successfully: tail var/build/logs/2015.01.06-10:53:22.nasLar1.initalign.log # hgwdev 2015.01.06-11:43:02 nasLar1.initalign: Succeeded: nasLar1 # hgwdev 2015.01.06-11:46:23 nasLar1.initalign: finish # To re-do, rm the dir first: # /cluster/data/genbank/work/initial.nasLar1 # load database when finished ssh hgwdev cd /cluster/data/genbank time ./bin/gbDbLoadStep -drop -initialLoad nasLar1 # logFile: var/dbload/hgwdev/logs/2015.01.06-13:41:18.nasLar1.dbload.log # real 6m52.226s # enable daily alignment and update of hgwdev cd ~/kent/src/hg/makeDb/genbank git pull # add nasLar1 to: # etc/align.dbs # etc/hgwdev.dbs git commit -m "Added nasLar1 - Proboscis monkey refs #14437" etc/align.dbs etc/hgwdev.dbs git push make etc-update ######################################################################### # uscsToINSDC table/track (DONE - 2015-02-18 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/ucscToINSDC cd /hive/data/genomes/nasLar1/bed/ucscToINSDC # check for chrM grep chrM ../../*.agp # chrM 1 16570 735352 F NC_008216.1 1 16570 + # use that accession here: ~/kent/src/hg/utils/automation/ucscToINSDC.sh \ ../../genbank/GCA_000772465.1_Charlie1.0_assembly_structure/Primary_Assembly \ NC_008216.1 # fixup chr names to UCSC names: sed -e 's/chralign_Mm/chr/;' ucscToINSDC.txt | sort > ucscChrNames.txt awk '{printf "%s\t0\t%d\n", $1,$2}' ../../chrom.sizes \ | sort > name.coordinate.tab join name.coordinate.tab ucscChrNames.txt | tr '[ ]' '[\t]' \ > ucscToINSDC.bed # verify all names are translated properly: wc -l *.bed ../../chrom.sizes # 319550 ucscToINSDC.bed # 319550 ../../chrom.sizes cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1 # 20 # use the 20 in this sed sed -e "s/21/20/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ | hgLoadSqlTab nasLar1 ucscToINSDC stdin ucscToINSDC.bed checkTableCoords nasLar1 # should cover %100 entirely: featureBits -countGaps nasLar1 ucscToINSDC # 3011982740 bases of 3011982740 (100.000%) in intersection ############################################################################ # BLATSERVERS ENTRY (DONE - 2015-03-20 - Hiram) # After getting a blat server assigned by the Blat Server Gods, ssh hgwdev # verify doesn't exist yet: hgsql -e 'select * from blatServers;' hgcentraltest | grep -i naslar # empty result hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("nasLar1", "blat4b", "17858", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("nasLar1", "blat4b", "17859", "0", "1");' \ hgcentraltest # test it with some sequence ############################################################################ ######################################################################### # all.joiner update, downloads and in pushQ - (DONE 2015-02-18 - Hiram) cd $HOME/kent/src/hg/makeDb/schema # fixup all.joiner until this is a clean output joinerCheck -database=nasLar1 -keys all.joiner joinerCheck -database=nasLar1 -tableCoverage all.joiner joinerCheck -database=nasLar1 -times all.joiner cd /hive/data/genomes/nasLar1 time makeDownloads.pl nasLar1 > do.log 2>&1 # real 16m28.538s # now ready for pushQ entry mkdir /hive/data/genomes/nasLar1/pushQ cd /hive/data/genomes/nasLar1/pushQ makePushQSql.pl nasLar1 > nasLar1.pushQ.sql 2> stderr.out # check for errors in stderr.out, some are OK, e.g.: # WARNING: hgwdev does not have /gbdb/nasLar1/wib/gc5Base.wib # WARNING: hgwdev does not have /gbdb/nasLar1/wib/quality.wib # WARNING: hgwdev does not have /gbdb/nasLar1/bbi/qualityBw/quality.bw # WARNING: nasLar1 does not have seq # WARNING: nasLar1 does not have extFile # WARNING: nasLar1 does not have estOrientInfo # WARNING: nasLar1 does not have mrnaOrientInfo # copy it to hgwbeta scp -p nasLar1.pushQ.sql qateam@hgwbeta:/tmp ssh qateam@hgwbeta "./bin/x86_64/hgsql qapushq < /tmp/nasLar1.pushQ.sql" # in that pushQ entry walk through each entry and see if the # sizes will set properly ######################################################################### # augustusGene track (DONE - 28 May 2015 - Hiram) mkdir /hive/data/genomes/nasLar1/bed/augustus cd /hive/data/genomes/nasLar1/bed/augustus time (doAugustus.pl -buildDir=`pwd` -bigClusterHub=ku \ -species=human -dbHost=hgwdev \ -workhorse=hgwdev nasLar1) > do.log 2>&1 # real 363m35.928s cat fb.nasLar1.augustusGene.txt # 35116222 bases of 2398172200 (1.464%) in intersection #############################################################################