#!/bin/tcsh -efx # Script to create a relational version of UniProt database. Should be run on # hgwdev. # NOTE: the next person to build this should set a variable DB, and then # use $DBDATE instead of substituting $DBDATE with whatever the next data is! set DBDATE=140122 set DB=sp$DBDATE # Set up working directory mkdir -p /hive/data/outside/uniProt/$DBDATE/build # Download uniProt. This will take about 12 hours cd /hive/data/outside/uniProt/$DBDATE/build wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.dat.gz # 486,223,605 262K/s in 28m 26s wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.dat.gz # 22,811,969,157 6.58M/s in 16h 31m wget ftp://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot_varsplic.fasta.gz # 7,393,531 1.66M/s in 8.7s # Turn flat file into relational tab-separated files. time zcat *.dat.gz | spToDb stdin ../tabFiles # real 32m21.930s # Create the database. hgsql mm10 <