# for emacs: -*- mode: sh; -*- # This file describes browser build for the ci3 # Ciona intestinalis, this version is what is in Ensembl v81 (and previous) # there is a newer version available at NCBI # Assembly Name: KH # Organism name: Ciona intestinalis (vase tunicate) # Taxid: 7719 # Submitter: Organization: Department of Zoology, Graduate School of # Date: 2011-4-29 # Assembly type: haploid # Release type: major # Assembly level: Chromosome # Genome representation: full # GenBank Assembly Accession: GCA_000224145.1 (replaced) # RefSeq Assembly Accession: GCF_000224145.1 (replaced) # RefSeq Assembly and GenBank Assemblies Identical: no # ## Assembly-Units: ## GenBank Unit Accession RefSeq Unit Accession Assembly-Unit name ## GCA_000224155.1 GCF_000224155.1 Primary Assembly ## GCA_000224165.1 GCF_000183085.1 non-nuclear # # Ordered by chromosome/plasmid; the chromosomes/plasmids are followed by # unlocalized scaffolds. # Unplaced scaffolds are listed at the end. # RefSeq is equal or derived from GenBank object. ############################################################################# # fetch sequence from NCBI (DONE - 2015-08-06 - Hiram) mkdir -p /hive/data/genomes/ci3/genbank cd /hive/data/genomes/ci3/genbank time rsync -L -a -P \ rsync://ftp.ncbi.nlm.nih.gov/genomes/all/GCF_000224145.1_KH/ ./ # measure what we have here: faSize GCF_000224145.1_KH_genomic.fna.gz # 115227500 bases (3064662 N's 112162838 real 77549892 upper 34612946 lower) # in 1272 sequences in 1 files # Total size: mean 90587.7 sd 621203.6 min 3020 (NW_004191353.1) max 10041005 # (NC_020166.1) median 8866 # %30.04 masked total, %30.86 masked real # chrMT sequence is: NC_004447.2 which is included ############################################################################# # fixup to UCSC naming scheme (DONE - 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/ucsc cd /hive/data/genomes/ci3/ucsc time ~/kent/src/hg/makeDb/doc/ci3/ucscCompositeAgp.pl ../genbank/GCF_000224145.1_KH_assembly_structure/Primary_Assembly # NC_020170.1 5 # NC_020177.1 12 # NC_020174.1 9 # NC_020167.1 2 # NC_020168.1 3 # NC_020173.1 8 # NC_020175.1 10 # NC_020172.1 7 # NC_020176.1 11 # NC_020169.1 4 # NC_020166.1 1 # NC_020179.1 14 # NC_020178.1 13 # NC_020171.1 6 # real 0m1.252s time ~/kent/src/hg/makeDb/doc/ci3/unplaced.pl ../genbank/GCF_000224145.1_KH_assembly_structure/Primary_Assembly # real 0m32.790s zcat ../genbank/GCF_000224145.1_KH_assembly_structure/non-nuclear/assembled_chromosomes/FASTA/chrMT.fna.gz \ | sed -e 's/^>.*/>chrM/;' > chrM.fa zcat ../genbank/GCF_000224145.1_KH_assembly_structure/non-nuclear/assembled_chromosomes/AGP/chrMT.comp.agp.gz \ | grep "^NC_004447" | sed -e 's/NC_004447.2/chrM/;' > chrM.agp gzip *.fa # verify nothing lost compared to genbank: faSize *.fa.gz # 115227500 bases (3064662 N's 112162838 real 112162838 upper 0 lower) # in 1272 sequences in 16 files # Total size: mean 90587.7 sd 621203.6 min 3020 (chrUn_NW_004191353v1) # max 10041005 (chr1) median 8866 # same numbers as above. ############################################################################# # Initial database build (DONE - 2015-08-06 - Hiram) cd /hive/data/genomes/ci3 cat << '_EOF_' > ci3.config.ra # Config parameters for makeGenomeDb.pl: db ci3 clade deuterostome genomeCladePriority 10 scientificName Ciona intestinalis commonName C. intestinalis assemblyDate Apr. 2011 assemblyLabel Kyoto KH assemblyShortLabel Kyoto KH orderKey 3044 # chrM bioproject: 30531 # mitoAcc NC_004447.2 already included mitoAcc none fastaFiles /hive/data/genomes/ci3/ucsc/*.fa.gz agpFiles /hive/data/genomes/ci3/ucsc/*.agp.gz # qualFiles none dbDbSpeciesDir squirt photoCreditURL http://www.ascidians.com/ photoCreditName Arjan Gittenberger ncbiGenomeId 49 ncbiAssemblyId 527578 ncbiAssemblyName KH ncbiBioProject 187185 genBankAccessionID GCF_000224145.1 taxId 7719 '_EOF_' # << happy emacs # verify sequence and AGP are OK: time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev -fileServer=hgwdev \ -stop=agp ci3.config.ra) > agp.log 2>&1 # real 0m15.114s # verify end of agp.log indicates: # *** All done! (through the 'agp' step) # then finish it off: time (makeGenomeDb.pl -workhorse=hgwdev -dbHost=hgwdev \ -fileServer=hgwdev -continue=db ci3.config.ra) > db.log 2>&1 # real 1m11.102s # fixing up the trackDb construction: /cluster/home/hiram/kent/src/hg/utils/automation/makeGenomeDb.pl \ -workhorse=hgwdev -dbHost=hgwdev \ -fileServer=hgwdev -continue=trackDb ci3.config.ra # check in the trackDb files created and add to trackDb/makefile ############################################################################## # cpgIslands on UNMASKED sequence (DONE - 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/bed/cpgIslandsUnmasked cd /hive/data/genomes/ci3/bed/cpgIslandsUnmasked time (doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku -buildDir=`pwd` \ -tableName=cpgIslandExtUnmasked \ -maskedSeq=/hive/data/genomes/ci3/ci3.unmasked.2bit \ -workhorse=hgwdev -smallClusterHub=ku ci3) > do.log 2>&1 # real 2m31.233s cat fb.ci3.cpgIslandExtUnmasked.txt # 663150 bases of 112164198 (0.591%) in intersection ############################################################################# # cytoBandIdeo - (DONE - 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/bed/cytoBand cd /hive/data/genomes/ci3/bed/cytoBand makeCytoBandIdeo.csh ci3 ######################################################################### # ucscToRefSeq table/track (DONE - 2017-03-14 - Hiram) mkdir /hive/data/genomes/ci3/bed/ucscToRefSeq cd /hive/data/genomes/ci3/bed/ucscToRefSeq # there is a chrM here grep chrM ../../*.agp # chrM 1 14790 1 O NC_004447.2 1 14790 + # NCBI ENtrez says this NC_004447.2 is AJ517314.2 # This assembly_report has *two* different MT sequences: # one with a genbank name: # KhM0 assembled-molecule MT Mitochondrion HT000188.1 <> na non-nuclear 14104 na # and one with a refseq name: # MT assembled-molecule MT Mitochondrion na <> NC_004447.2 non-nuclear 14790 na printf "NC_004447.2\tAJ517314.2\n" > refseq.insdc.txt grep -v "^#" ../../refseq/GCF*_assembly_report.txt | cut -f5,7 \ | grep -v NC_004447.2 \ | awk '{printf "%s\t%s\n", $2, $1}' | sort >> refseq.insdc.txt printf "chrM\tNC_004447.2\n" > ucscToRefSeq.tab grep -v "Accession.version" ../../genbank/GCF_000224145.1_KH_assembly_structure/Primary_Assembly/assembled_chromosomes/chr2acc \ | awk '{printf "chr%d\t%s\n", $1, $2}' | sort >> ucscToRefSeq.tab grep NW_ ../../chrom.sizes | awk '{printf "%s\t%s\n", $1, $1}' \ | sed -e 's/chrUn_//; s/v1/.1/;' | awk '{printf "%s\t%s\n", $2, $1}' \ | sort >> ucscToRefSeq.tab sort -k2 ucscToRefSeq.tab | join -1 2 - refseq.insdc.txt \ | awk '{printf "%s\t%s\n", $2, $3}' | sort > ucscToINSDC.tab awk '{printf "%s\t0\t%d\n", $1,$2}' ../../chrom.sizes \ | sort > ucsc.coordinate.tab join ucscToINSDC.tab ucsc.coordinate.tab \ | awk '{printf "%s\t%d\t%d\t%s\n", $1,$3,$4,$2}' \ | sort > ucscToINSDC.bed sort ucscToRefSeq.tab \ | join - ucsc.coordinate.tab \ | awk '{printf "%s\t%d\t%d\t%s\n", $1,$3,$4,$2}' \ | sort > ucscToRefSeq.bed # the second MT HT000188.1 is not matched in UCSC assembly: wc -l * # 1273 refseq.insdc.txt # 1272 ucsc.coordinate.tab # 1272 ucscToINSDC.bed # 1272 ucscToINSDC.tab # 1272 ucscToRefSeq.bed # 1272 ucscToRefSeq.tab export chrSize=`cut -f1 ucscToRefSeq.bed | awk '{print length($0)}' | sort -n | tail -1` echo $chrSize # 20 sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ | sed -e 's/INSDC/RefSeq/g;' > ucscToRefSeq.sql hgLoadSqlTab ci3 ucscToRefSeq ./ucscToRefSeq.sql ucscToRefSeq.bed # and the existing ucscToINSDC is incorrect: export chrSize=`cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1` echo $chrSize # 20 sed -e "s/21/$chrSize/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ > ucscToINSDC.sql hgLoadSqlTab ci3 ucscToINSDC ./ucscToINSDC.sql ucscToINSDC.bed # checkTableCoords should be silent checkTableCoords ci3 # each should cover %100 entirely: featureBits -countGaps ci3 ucscToINSDC # 115227500 bases of 115227500 (100.000%) in intersection featureBits -countGaps ci3 ucscToRefSeq # 115227500 bases of 115227500 (100.000%) in intersection ######################################################################### # add chromAlias table (DONE - 2017-03-14 - Hiram) # the previous chromAlias was incorrect, it was the incorrect genbank # assembly correspondence. This already has the ensembl list here. mkdir /hive/data/genomes/ci3/bed/chromAlias cd /hive/data/genomes/ci3/bed/chromAlias hgsql -N -e 'select chrom,name from ucscToRefSeq;' ci3 \ > ucsc.refseq.tab hgsql -N -e 'select chrom,name from ucscToINSDC;' ci3 \ > ucsc.genbank.tab ~/kent/src/hg/utils/automation/chromAlias.pl # verify all are correct counts: cut -f3 ci3.chromAlias.tab | tr '[,]' '[\n]' | sort | uniq -c # 1272 ensembl # 1272 genbank # 1272 refseq wc -l *.tab # 2559 ci3.chromAlias.tab # 1272 ucsc.ensembl.tab # 1272 ucsc.genbank.tab # 1272 ucsc.refseq.tab hgLoadSqlTab ci3 chromAlias ~/kent/src/hg/lib/chromAlias.sql \ ci3.chromAlias.tab ######################################################################### # ucscToINSDC table/track (DONE - 2015-07-10 - Hiram) mkdir /hive/data/genomes/ci3/bed/ucscToINSDC cd /hive/data/genomes/ci3/bed/ucscToINSDC # find the chrM accession: grep chrM ../../*.agp # chrM 1 14790 1 O NC_004447.2 1 14790 + # use that as the second argument here: ~/kent/src/hg/utils/automation/ucscToINSDC.sh \ ../../genbank/GCF_*assembly_structure/Primary_Assembly NC_004447.2 # didn't work on this odd one: mv ucscToINSDC.txt ucscToINSDC.broken sed -e 's/NWv/NW_/; s/\tNW/v1\tNW/;' ucscToINSDC.broken > ucscToINSDC.txt awk '{printf "%s\t0\t%d\n", $1,$2}' ../../chrom.sizes \ | sort > name.coordinate.tab join name.coordinate.tab ucscToINSDC.txt | tr '[ ]' '[\t]' \ > ucscToINSDC.bed # verify all names are coming through, should be same line count: wc -l * # 1272 name.coordinate.tab # 1272 ucscToINSDC.bed # 1272 ucscToINSDC.broken # 1272 ucscToINSDC.txt cut -f1 ucscToINSDC.bed | awk '{print length($0)}' | sort -n | tail -1 # 20 # use the 20 in this sed sed -e "s/21/20/" $HOME/kent/src/hg/lib/ucscToINSDC.sql \ | hgLoadSqlTab ci3 ucscToINSDC stdin ucscToINSDC.bed checkTableCoords ci3 # should cover %100 entirely: featureBits -countGaps ci3 ucscToINSDC # 115227500 bases of 115227500 (100.000%) in intersection ######################################################################### # fixup search rule for assembly track/gold table (DONE - 2015-07-10 - Hiram) hgsql -N -e "select frag from gold;" ci3 | sort | head -3 EAAA01000001.1 EAAA01000002.1 EAAA01000003.1 hgsql -N -e "select frag from gold;" ci3 | sort | tail -2 EAAA01006374.1 NC_004447.2 [EN][AC][A_][A0][0-9]*(\.[12])? # verify this rule will find them all or eliminate them all: hgsql -N -e "select frag from gold;" ci3 | wc -l # 6374 hgsql -N -e "select frag from gold;" ci3 \ | egrep -e '[EN][AC][A_][A0][0-9]*(\.[12])?' | wc -l # 6374 hgsql -N -e "select frag from gold;" ci3 \ | egrep -v -e '[EN][AC][A_][A0][0-9]*(\.[12])?' | wc -l # 0 # hence, add to trackDb/worm/ci3/trackDb.ra searchTable gold shortCircuit 1 termRegex [EN][AC][A_][A0][0-9]*(\.[12])? query select chrom,chromStart,chromEnd,frag from %s where frag like '%s%%' searchPriority 8 ########################################################################## # running repeat masker (DONE - 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/bed/repeatMasker cd /hive/data/genomes/ci3/bed/repeatMasker time (doRepeatMasker.pl -buildDir=`pwd` \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -smallClusterHub=ku ci3) > do.log 2>&1 # real 17m59.020s cat faSize.rmsk.txt # 115227500 bases (3064662 N's 112162838 real 95159873 upper # 17002965 lower) in 1272 sequences in 1 files # Total size: mean 90587.7 sd 621203.6 min 3020 (chrUn_NW_004191353v1) # max 10041005 (chr1) median 8866 # %14.76 masked total, %15.16 masked real egrep -i "versi|relea" do.log # RepeatMasker version open-4.0.5 # January 31 2015 (open-4-0-5) version of RepeatMasker # CC RELEASE 20140131; time featureBits -countGaps ci3 rmsk # 17008343 bases of 115227500 (14.761%) in intersection # real 0m1.517s # why is it different than the faSize above ? # because rmsk masks out some N's as well as bases, the count above # separates out the N's from the bases, it doesn't show lower case N's ########################################################################## # running simple repeat (DONE - 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/bed/simpleRepeat cd /hive/data/genomes/ci3/bed/simpleRepeat time (doSimpleRepeat.pl -buildDir=`pwd` -bigClusterHub=ku \ -dbHost=hgwdev -workhorse=hgwdev -smallClusterHub=ku \ ci3) > do.log 2>&1 # real 7m4.597s cat fb.simpleRepeat # 3958962 bases of 112164198 (3.530%) in intersection # add to rmsk after it is done: cd /hive/data/genomes/ci3 twoBitMask ci3.rmsk.2bit \ -add bed/simpleRepeat/trfMask.bed ci3.2bit # you can safely ignore the warning about fields >= 13 twoBitToFa ci3.2bit stdout | faSize stdin > faSize.ci3.2bit.txt cat faSize.ci3.2bit.txt # 2641342258 bases (41642089 N's 2599700169 real 1445054690 upper # 1154645479 lower) in 267625 sequences in 1 files # Total size: mean 9869.6 sd 1168451.9 min 152 (chrUn_AANG03201581v1) # max 240380223 (chrA1) median 654 # %43.71 masked total, %44.41 masked real rm /gbdb/ci3/ci3.2bit ln -s `pwd`/ci3.2bit /gbdb/ci3/ci3.2bit ########################################################################## # CREATE MICROSAT TRACK (DONE - 2015-08-06 - Hiram) ssh hgwdev mkdir /cluster/data/ci3/bed/microsat cd /cluster/data/ci3/bed/microsat awk '($5==2 || $5==3) && $6 >= 15 && $8 == 100 && $9 == 0 {printf("%s\t%s\t%s\t%dx%s\n", $1, $2, $3, $6, $16);}' \ ../simpleRepeat/simpleRepeat.bed > microsat.bed hgLoadBed ci3 microsat microsat.bed # Read 152 elements of size 4 from microsat.bed ########################################################################## ## WINDOWMASKER (DONE - 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/bed/windowMasker cd /hive/data/genomes/ci3/bed/windowMasker time (doWindowMasker.pl -buildDir=`pwd` -workhorse=hgwdev \ -dbHost=hgwdev ci3) > do.log 2>&1 # real 5m11.115s # Masking statistics cat faSize.ci3.cleanWMSdust.txt # 115227500 bases (3064662 N's 112162838 real 76632395 upper # 35530443 lower) in 1272 sequences in 1 files # Total size: mean 90587.7 sd 621203.6 min 3020 (chrUn_NW_004191353v1) # max 10041005 (chr1) median 8866 # %30.84 masked total, %31.68 masked real cat fb.ci3.rmsk.windowmaskerSdust.txt # 13034836 bases of 115227500 (11.312%) in intersection # using this Window Masker result for final masking:: cd /hive/data/genomes/ci3 # you can safely ignore the warning about fields >= 13 twoBitMask bed/windowMasker/ci3.cleanWMSdust.2bit \ -add bed/simpleRepeat/trfMask.bed ci3.2bit # measure the final masking: twoBitToFa ci3.2bit stdout | faSize stdin > faSize.ci3.2bit.txt cat faSize.ci3.2bit.txt # 115227500 bases (3064662 N's 112162838 real 76616873 upper # 35545965 lower) in 1272 sequences in 1 files # Total size: mean 90587.7 sd 621203.6 min 3020 (chrUn_NW_004191353v1) # max 10041005 (chr1) median 8866 # %30.85 masked total, %31.69 masked real # and reset the symlink rm /gbdb/ci3/ci3.2bit ln -s /hive/data/genomes/ci3/ci3.2bit /gbdb/ci3/ci3.2bit ########################################################################## # cpgIslands - (DONE - 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/bed/cpgIslands cd /hive/data/genomes/ci3/bed/cpgIslands time doCpgIslands.pl -dbHost=hgwdev -bigClusterHub=ku \ -workhorse=hgwdev -smallClusterHub=ku ci3 > do.log 2>&1 # real 2m31.639s cat fb.ci3.cpgIslandExt.txt # 493798 bases of 112164198 (0.440%) in intersection ############################################################################## # genscan - (DONE 2015-08-06 - Hiram) mkdir /hive/data/genomes/ci3/bed/genscan cd /hive/data/genomes/ci3/bed/genscan time (doGenscan.pl -buildDir=`pwd` -workhorse=hgwdev -dbHost=hgwdev \ -bigClusterHub=ku ci3) > do.log 2>&1 # real 7m40.961s cat fb.ci3.genscan.txt # 9423651 bases of 112164198 (8.402%) in intersection cat fb.ci3.genscanSubopt.txt # 2728824 bases of 112164198 (2.433%) in intersection ######################################################################## # Create kluster run files (DONE - 2015-08-25 - Hiram) # numerator is ci3 gapless bases "real" as reported by: # numerator is ce11 gapless bases "real" as reported by: head -1 faSize.ci3.2bit.txt # 115227500 bases (3064662 N's 112162838 real 76616873 upper 35545965 lower) # in 1272 sequences in 1 files # denominator is hg19 gapless bases as reported by: # featureBits -noRandom -noHap hg19 gap # 234344806 bases of 2861349177 (8.190%) in intersection # 1024 is threshold used for human -repMatch: calc \( 112162838 / 2861349177 \) \* 1024 # ( 112162838 / 2861349177 ) * 1024 = 40.140066 # ==> use -repMatch=100 since the smaller '50' number would count too many cd /hive/data/genomes/ci3 blat ci3.2bit \ /dev/null /dev/null -tileSize=11 -makeOoc=jkStuff/ci3.11.ooc \ -repMatch=100 # Wrote 6681 overused 11-mers to jkStuff/ci3.11.ooc # There are no non-bridged gaps hgsql -N \ -e 'select * from gap where bridge="no" order by size;' ci3 \ | sort -k7,7nr # most non-bridged gaps have size = 5000, follow pig's example (most # 100, but use 5000) # decide on a minimum gap for this break, use either 100 or 5000 will # generate 13387 liftOver rows, but if use 6000, only got 11703 rows. # so use 100 here to get more liftOver row. gapToLift -verbose=2 -minGap=100 bosTau7 jkStuff/nonBridged.lft \ -bedFile=jkStuff/nonBridged.bed # check N50 size n50.pl chrom.sizes # reading: chrom.sizes # contig count: 1272, total size: 115227500, one half size: 57613750 # cumulative N50 count contig contig size # 54476032 8 chr12 5356124 # 57613750 one half size # 59628933 9 chr11 5152901 ######################################################################## # GENBANK AUTO UPDATE (DONE - 2015-08-26 - Hiram) ssh hgwdev cd $HOME/kent/src/hg/makeDb/genbank git pull # /cluster/data/genbank/data/organism.lst shows: # #organism mrnaCnt estCnt refSeqCnt # Ciona intestinalis 8186 1205674 948 # Ciona savignyi 106 84302 0 # edit etc/genbank.conf to add ci3 just after ci2 # ci3 (ciona intestinalis) ci3.serverGenome = /hive/data/genomes/ci3/ci3.2bit ci3.clusterGenome = /hive/data/genomes/ci3/ci3.2bit ci3.ooc = /hive/data/genomes/ci3/jkStuff/ci3.11.ooc ci3.maxIntron = 20000 ci3.lift = no ci3.refseq.mrna.native.pslCDnaFilter = ${ordered.refseq.mrna.native.pslCDnaFilter} ci3.refseq.mrna.xeno.pslCDnaFilter = ${ordered.refseq.mrna.xeno.pslCDnaFilter} ci3.genbank.mrna.native.pslCDnaFilter = ${ordered.genbank.mrna.native.pslCDnaFilter} ci3.genbank.mrna.xeno.pslCDnaFilter = ${ordered.genbank.mrna.xeno.pslCDnaFilter} ci3.genbank.est.native.pslCDnaFilter = ${ordered.genbank.est.native.pslCDnaFilter} # refseq.mrna native and xeno are default yes # genbank.mrna and genbank.est native are default yes, the xeno is default no ci3.downloadDir = ci3 ci3.perChromTables = no git commit -m "Added ci3 Ciona intestnalis; refs #15796" etc/genbank.conf git push # update /cluster/data/genbank/: make etc-update # Edit src/lib/gbGenome.c to add new species. Skipped screen # control this business with a screen since it takes a while cd /cluster/data/genbank time ./bin/gbAlignStep -initial ci3 # logFile: var/build/logs/2015.08.24-10:47:11.ci3.initalign.log # real 87m30.071s # To re-do, rm the dir first: # /cluster/data/genbank/work/initial.ci3 # to avoid failed load: cd /cluster/data/genbank/data/aligned/refseq.71/ci3 # find file with illegal name: grep AC_ */*xeno* daily.2015.0725/mrna.xeno.alidx:AC_000192 1 0 # remove that line from that file # load database when finished ssh hgwdev cd /cluster/data/genbank time ./bin/gbDbLoadStep -drop -initialLoad ci3 # real 20m9.391s # real 23m11.113s # logFile: var/dbload/hgwdev/logs/2015.08.26-10:40:23.ci3.dbload.log # enable daily alignment and update of hgwdev cd ~/kent/src/hg/makeDb/genbank git pull # add ci3 to: # etc/align.dbs # etc/hgwdev.dbs git commit -m "Added ci3 - Ciona intestinalis refs #15796" etc/align.dbs etc/hgwdev.dbs git push make etc-update ######################################################################### # LIFTOVER TO ci2 (DONE - 2015-08-26 - Hiram ) mkdir /hive/data/genomes/ci3/bed/blat.ci2.2015-08-26 cd /hive/data/genomes/ci3/bed/blat.ci2.2015-08-26 # -debug run to create run dir, preview scripts... doSameSpeciesLiftOver.pl -buildDir=`pwd` \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/ci3/jkStuff/ci3.11.ooc -debug ci3 ci2 # Real run: time (doSameSpeciesLiftOver.pl -buildDir=`pwd` \ -bigClusterHub=ku -dbHost=hgwdev -workhorse=hgwdev \ -ooc=/hive/data/genomes/ci3/jkStuff/ci3.11.ooc ci3 ci2) > do.log 2>&1 # real 19m1.617s # verify it works on genome-test ############################################################################# # all.joiner update, downloads and in pushQ - (done 2016-01-08 - Hiram) cd $HOME/kent/src/hg/makeDb/schema # fixup all.joiner until this is a clean output joinerCheck -database=ci3 -keys all.joiner joinerCheck -database=ci3 -tableCoverage all.joiner joinerCheck -database=ci3 -times all.joiner cd /hive/data/genomes/ci3 makeDownloads.pl ci3 > downloads.log 2>&1 # now ready for pushQ entry mkdir /hive/data/genomes/ci3/pushQ cd /hive/data/genomes/ci3/pushQ makePushQSql.pl ci3 > ci3.pushQ.sql 2> stderr.out # check for errors in stderr.out, some are OK, e.g.: # WARNING: hgwdev does not have /gbdb/ci3/wib/gc5Base.wib # WARNING: hgwdev does not have /gbdb/ci3/wib/quality.wib # WARNING: hgwdev does not have /gbdb/ci3/bbi/quality.bw # WARNING: ci3 does not have seq # WARNING: ci3 does not have extFile # copy it to hgwbeta scp -p ci3.pushQ.sql qateam@hgwbeta:/tmp ssh qateam@hgwbeta hgwbeta "./bin/x86_64/hgsql qapushq < /tmp/ci3.pushQ.sql" # in that pushQ entry walk through each entry and see if the # sizes will set properly ######################################################################### # BLATSERVERS ENTRY (DONE - 2017-05-09 - Hiram) # After getting a blat server assigned by the Blat Server Gods, ssh hgwdev hgsql -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("ci3", "blat1a", "17828", "1", "0"); \ INSERT INTO blatServers (db, host, port, isTrans, canPcr) \ VALUES ("ci3", "blat1a", "17829", "0", "1");' \ hgcentraltest # test it with some sequence ############################################################################