# Wed Sep 6 16:09:32 PDT 2017 - Max # This is a hacky pipeline for a very small database. Snpedia has tens of thousands of pages, but most have been # auto-generated. These scripts try their best to only show SNPs where something has been manually typed into the # SNPedia page. They start from the Snpedia GFF file, use the SNPedia API (which we can hammer a bit, they're not picky # about that) get wikimedia markup, use that to reduce the number of pages, then download their html (because wikimedia # is very hard to translate to html), fix the positions for hg19 and hg38 and load up everything. cd /hive/data/genomes/hg19/bed/snpedia SRC=~/kent/src/hg/makeDb/scripts/snpedia wget http://www.snpedia.com/files/gbrowse/SNPedia.gff mkdir -p build/hg38 buid/hg19 build/pages # download all pages from the GFF file in wikimedia format python $SRC/download.py > build/download.log # remove all pages without any visible typed text python $SRC/filterPages.py > build/goodPages.txt # fix the positions using our own snp150 tracks python $SRC/getPos.py > build/getPos.log # get the html of the final list python $SRC/downloadHtml.py # strip down the html and create a bed and the htmlTab files python $SRC/makeBed.py hg19 && python makeBed.py hg38 # also make bigBed file with the GFF contents, but fix the positions python $SRC/makeAll.py # takes 5 hours, stupid fgrep # load up everything hgLoadBed hg19 snpediaText build/hg19/snpedia.bed -allowStartEqualEnd hgLoadBed hg38 snpediaText build/hg38/snpedia.bed -allowStartEqualEnd hgLoadSqlTab hg19 snpediaTextHtml $SRC/snpediaHtml.sql build/hg19/snpedia.htmlTab hgLoadSqlTab hg38 snpediaTextHtml $SRC/snpediaHtml.sql build/hg38/snpedia.htmlTab bedSort build/hg19/snpAll.bed build/hg19/snpAll.bed bedToBigBed build/hg19/snpAll.bed /scratch/data/hg19/chrom.sizes -type=bed9+ -tab -as=$SRC/bed4Note.as snpediaAll.hg19.bb -extraIndex=name bedSort build/hg38/snpAll.bed build/hg38/snpAll.bed bedToBigBed build/hg38/snpAll.bed /scratch/data/hg38/chrom.sizes -type=bed9+ -tab -as=$SRC/bed4Note.as snpediaAll.hg38.bb -extraIndex=name ln -s `pwd`/snpediaAll.hg19.bb /gbdb/hg19/bbi/snpediaAll.bb ln -s `pwd`/snpediaAll.hg38.bb /gbdb/hg38/bbi/snpediaAll.bb hgsql hgFixed -e "insert into trackVersion values (NULL, 'hg38', 'snpedia', 'max', 'Downloaded April 15, 2017, using dbSNP 150', now(), 'manual run', 'snpedia website', '');" hgsql hgFixed -e "insert into trackVersion values (NULL, 'hg19', 'snpedia', 'max', 'Downloaded April 15, 2017, using dbSNP 150', now(), 'manual run', 'snpedia website', '');"