# GWAS from the COVID-19 Host Genetics Initiative (HGI) # (2020-07-02 kate) # From: covid19hg.org/results # Contacts: Rachel Liao, Juha Karjalainen (Broad) juha.karjalainen@helsinki.fi # Create build dir cd /hive/data/outside/covid/covidHostGenetics # GWAS meta-analyses file format 1 #CHR chromosome 2 POS chromosome position in build 37 3 REF non-effect allele 4 ALT effect allele (beta is for this allele) 5 SNP #CHR:POS:REF:ALT {STUDY}_AF_Allele2 allele frequency in {STUDY} {STUDY}_AF_fc allele frequency in {SeUDY} / allele frequency in gnomAD v3 (1000000 if frequency in gnomAD is 0). Calculated based on each study'ss ancestry in gnomAD {STUDY}_N 6 + (X = #studies * 3) all_meta_N number of studies that had the variant after AF and INFO filtering and as such were used for the meta 7 + X all_inv_var_meta_beta effect size on log(OR) scale 8 + X all_inv_var_meta_sebeta standard error of effect size 9 + X all_inv_var_meta_p p-value 10 + X all_inv_var_het_p p-value from Cochran's Q heterogeneity test # additional columns: 11 + X "all_meta_sample_N" 12 + X "all_meta_AF" 13 + X "rsid" # additional for hg19 liftover. Values in hg38. 14 + X "anew_chr" 15 + X "anew_pos" 16 + X "REF.1" 17 + X "ALT.1" # Studies 1 Genetic determinants of COVID-19 complications in the Brazilian population BRACOVID 2 Genetic modifiers for COVID-19 related illness BelCovid 3 deCODE DECODE 4 FinnGen FinnGen 5 GEN-COVID, reCOVID GENCOVID 6 UK 100,000 Genomes Project genomicsengland100kgp_EUR 7 Genes & Health GNH 8 Generation Scotland GS 9 COVID19-Host(a)ge HOSTAGE 10 Helix Exome+ COVID-19 Phenotypes Helix 11 UK Blood Donors Cohort INTERVAL 12 LifeLines CytoSNP LifelinesCyto 13 LifeLines Global Screening Array LifelinesGsa 14 Netherlands Twin Register NTR 15 Partners Healthcare Biobank PHBB 16 Qatar Genome Program QGP 17 UK Biobank UKBB ##################### # Consult with Ana on Aug 12 1. Restrict to analyses with enough power (based on plots): B2: hospitalized covid vs. population (3199 cases, 8 studies) C2: covid vs. population (6696 cases, 18 studies) 2. Label options: rsID and/or ALT/REF (use ALT/REF if no label) 3. Mouseover: pValue, effect size, #studies 4. Filters: pValue (default=5), #studies, ###################### # Make with new .as (input from Juha), and hg38 tracks # (2020-09-04 kate) # Analysis B2: hospitalized covid vs. population # cases: 3199 # studies: 8 # Analysis C2: covid vs. population # cases: 6696 # studies: 18 # download hg19 files wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_B2_V2_20200701.b37.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200619/results/build_37/COVID19_HGI_ANA_C2_V2_20200701.b37.txt.gz # download hg38 files wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_B2_V2_20200701.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200619/results/COVID19_HGI_ANA_C2_V2_20200701.txt.gz gunzip *.z wc -l covidHgiGwas*.hg38.txt 15392647 covidHgiGwas.B2.hg38.txt 24600933 covidHgiGwas.C2.hg38.txt # rename ln -s COVID19_HGI_ANA_B2_V2_20200701.txt covidHgiGwas.B2.hg38.txt ln -s COVID19_HGI_ANA_C2_V2_20200701.txt covidHgiGwas.C2.hg38.txt ln -s COVID19_HGI_ANA_B2_V2_20200701.b37.txt covidHgiGwas.B2.hg19.txt ln -s COVID19_HGI_ANA_C2_V2_20200701.b37.txt covidHgiGwas.C2.hg19.txt wc -l covidHgiGwas* 15392647 covidHgiGwas.B2.hg38.txt 24600933 covidHgiGwas.C2.hg38.txt mkdir -p /gbdb/hg19/covidHgiGwas /gbdb/hg38/covidHgiGwas cat > makeHgi.csh << 'EOF' set bin = ~/kent/src/hg/makeDb/outside/covid foreach d (B2.hg19 B2.hg38 C2.hg19 C2.hg38) set db = $d:e set a = $d:r set sizes = /hive/data/genomes/$db/chrom.sizes set b = covidHgiGwas set f = $b.$a.$db set bb = $b$a.$db echo $b.$a.$db if ($a == "B2") then set studies = 8 else set studies = 18 endif perl $bin/makeCovidHgiGwas.pl $db $studies $f.txt > $f.bed bedSort $f.bed $f.sorted.bed bedToBigBed -type=bed9+12 -as=$bin/covidHgiGwas.as -tab $f.sorted.bed $sizes $bb.bb ln -s `pwd`/$bb.bb /gbdb/hg19/covidHgiGwas end 'EOF' make makeHgi.csh >&! makeHgi.out & ################## # Get effect size distribution to inform color gradient selection % textHistogram -real -minVal=-25 -binSize=1 -maxBinCount=50 effectSizes.txt -13.000000 1 -12.000000 1 -11.000000 0 -10.000000 2 -9.000000 3 -8.000000 6 -7.000000 6 -6.000000 * 19 -5.000000 *** 53 -4.000000 ********************************************************* 944 -3.000000 ************************************************************ 1000 -2.000000 ************************************************************ 1000 -1.000000 ************************************************************ 1000 0.000000 ************************************************************ 1001 1.000000 ************************************************************ 1000 2.000000 ************************************************************ 1000 3.000000 ************************************************************ 1000 4.000000 ************************************************************ 994 5.000000 ********************************************************** 971 6.000000 **************************************************** 866 7.000000 ******************************************* 717 8.000000 ****************************** 504 9.000000 ********************** 371 10.000000 *********** 183 11.000000 ***** 78 12.000000 *** 43 13.000000 ** 37 14.000000 * 13 15.000000 4 16.000000 6 17.000000 2 18.000000 0 19.000000 0 20.000000 1 ################################## # Release 4 (October 2020) # Dec. 2020 Kate #23 contributors world-wide, including 23 and me #14 European contributors in 23andme leave-out analysis # download, all leaving out 23andme (to allow full download) # using 4 analyses this time, as per Ana recommendation (enough power now in A2 and C1) # Note the analysis methods have changed slightly. Effect size ranges are much smaller. B2: Hospitalized Covid vs population Cases: 8638 - 23andme (613+140) = 7885 Controls: 1736547 - 23andme (680416+94327) = 961804 C2: Covid vs population Cases: 30937 - 23andme (506+9913+2553) = 17965 Controls: 1471815 - 23andme (3110+85072+13086) = A2: Very severe respratory confirmed covid vs population Cases: 4933 - 23andme (495 + 102) = 4336 Controls: 1398672 (680440 + 94330) = 623902 C1: Covid vs. lab/self-reported negative Cases: 24057 - 23andme (506 + 9913 + 2553) = 11085 Controls: 218062 - 23andme (85072 + 13086 + 3110) = 116794 Studies: 23 - 23andme (3) = 20 # hg38 wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_B2_ALL_leave_23andme_20201020.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_C2_ALL_leave_23andme_20201020.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_A2_ALL_leave_23andme_20201020.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_C1_ALL_leave_23andme_20201020.txt.gz # hg19 wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_B2_ALL_leave_23andme_20201020.b37.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_C2_ALL_leave_23andme_20201020.b37.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_A2_ALL_leave_23andme_20201020.b37.txt.gz wget https://storage.googleapis.com/covid19-hg-public/20200915/results/20201020/COVID19_HGI_C1_ALL_leave_23andme_20201020.b37.txt.gz gunzip *.z # rename ln -s COVID19_HGI_B2_ALL_leave_23andme_20201020.b37.txt covidHgiGwasR4.B2.hg19.txt ln -s COVID19_HGI_B2_ALL_leave_23andme_20201020.txt covidHgiGwasR4.B2.hg38.txt ln -s COVID19_HGI_C2_ALL_leave_23andme_20201020.b37.txt covidHgiGwasR4.C2.hg19.txt ln -s COVID19_HGI_C2_ALL_leave_23andme_20201020.txt covidHgiGwasR4.C2.hg38.txt ln -s COVID19_HGI_A2_ALL_leave_23andme_20201020.txt covidHgiGwasR4.A2.hg38.txt ln -s COVID19_HGI_A2_ALL_leave_23andme_20201020.b37.txt covidHgiGwasR4.A2.hg19.txt ln -s COVID19_HGI_C1_ALL_leave_23andme_20201020.txt covidHgiGwasR4.C1.hg38.txt ln -s COVID19_HGI_C1_ALL_leave_23andme_20201020.b37.txt covidHgiGwasR4.C1.hg19.txt wc -l covidHgiGwas*.txt # previous (release 3) track #15392647 covidHgiGwas.B2.hg38.txt # 15M #24600933 covidHgiGwas.C2.hg38.txt 11830413 covidHgiGwasR4.A2.hg19.txt 11846556 covidHgiGwasR4.A2.hg38.txt 11434012 covidHgiGwasR4.B2.hg19.txt 11449434 covidHgiGwasR4.B2.hg38.txt # 11M 13060711 covidHgiGwasR4.C1.hg19.txt 13075183 covidHgiGwasR4.C1.hg38.txt 12706061 covidHgiGwasR4.C2.hg19.txt 12722892 covidHgiGwasR4.C2.hg38.txt # Note more filtered than release 3 data # Columns # # head -2 covidHgiGwasR4.B2.hg19.txt CHR POS REF ALT SNP all_meta_N all_inv_var_meta_beta all_inv_var_meta_sebeta all_inv_var_meta_p all_inv_var_het_p all_meta_sample_N all_meta_AF rsid # 13 fields # These are same as Release 3, except: # 1. hg19 lacks trailing columns from lift # 2. there are no study-specific values (between SNP and all_meta_N) # determine range of effect size for filter settings, excluding low significance (log pvalue<3 items) # parse and generate bigBed files cat > makeHgi.csh << 'EOF' set bin = ~/kent/src/hg/makeDb/outside/covid foreach d (B2.hg19 B2.hg38 C2.hg19 C2.hg38 A2.hg19 A2.hg38 C1.hg19 C1.hg38) set db = $d:e set a = $d:r set sizes = /hive/data/genomes/$db/chrom.sizes set b = covidHgiGwasR4 set f = $b.$a.$db set bb = $b$a.$db echo $b.$a.$db if ($a == "B2") then set studies = 21 else if ($a == "C2") then set studies = 33 else if ($a == "A2") then set studies = 12 else if ($a == "C1") then set studies = 20 endif # colored by pvalue (effect size determines lolly height) # NOTE: this track will not be on RR (perhaps will host on genome-test for comparison with # earlier data perl $bin/makeCovidHgiGwas.pl $db $studies $f.txt R4 > $f.bed bedSort $f.bed $f.sorted.bed bedToBigBed -type=bed9+12 -as=$bin/covidHgiGwas.as -tab $f.sorted.bed $sizes $bb.bb ln -s `pwd`/$bb.bb /gbdb/$db/covidHgiGwas # colored by effect size (pvalue determines lolly height) #perl $bin/makeCovidHgiGwas.pl $db $studies $f.txt R4 .025 > $f.EC.bed # Use medians: positive theshold .178, negative threshold .119 perl $bin/makeCovidHgiGwas.pl $db $studies $f.txt R4 .066 .115 > $f.EC.bed bedSort $f.EC.bed $f.sorted.EC.bed bedToBigBed -type=bed9+12 -as=$bin/covidHgiGwas.as -tab $f.sorted.EC.bed $sizes $bb.EC.bb ln -s `pwd`/$bb.EC.bb /gbdb/$db/covidHgiGwas end 'EOF' # At Ana's suggestion, remake track with per-track effect size coloring thresholds cat >getEffectSizes.csh << 'EOF' foreach a (A2 B2 C1 C2) echo -n "$a\t" awk '$13 >= 3.0 {print}' covidHgiGwasR4.$a.hg38.EC.bed > $a.highPvalues.bed awk '$10 < 0 {print $21}' $a.highPvalues.bed | sort -n > negEffects.$a.txt awk '$10 > 0 {print $21}' $a.highPvalues.bed | sort -n > posEffects.$a.txt set neg = `ave negEffects.$a.txt | grep median | awk '{print $2}'` set pos = `ave posEffects.$a.txt | grep median | awk '{print $2}'` echo "$neg\t$pos" end 'EOF' cat getEffectsizes.out A2 0.130000 0.276000 B2 0.118000 0.206000 C1 0.094000 0.169000 C2 0.066000 0.115000 cat > reColor.csh << 'EOF' set bin = ~/kent/src/hg/makeDb/outside/covid foreach d (A2.hg38 B2.hg38 C1.hg38 C2.hg38 C2.hg19 A2.hg19 C1.hg19 B2.hg19 ) set db = $d:e set a = $d:r set sizes = /hive/data/genomes/$db/chrom.sizes set b = covidHgiGwasR4 set f = $b.$a.$db set bb = $b.$a.$db echo $b.$a.$db if ($a == "A2") then set studies = 12 # median negative and positive effect sizes set neg = 0.130000; set pos = 0.276000 else if ($a == "B2") then set studies = 21 set neg = 0.118000 ; set pos = 0.206000 else if ($a == "C1") then set studies = 20 set neg = 0.094000; set pos = 0.169000 else if ($a == "C2") then set studies = 33 set neg = 0.066000; set pos = 0.115000 endif # colored by pvalue (effect size determines lolly height) #perl $bin/makeCovidHgiGwas.pl $db $studies $f.txt R4 > $f.beta.bed #bedSort $f.beta.bed $f.beta.sorted.bed #bedToBigBed -type=bed9+12 -as=$bin/covidHgiGwas.as -tab $f.beta.sorted.bed $sizes $bb.beta.bb #ln -s `pwd`/$bb.beta.bb /gbdb/$db/covidHgiGwas # colored by effect size (pvalue determines lolly height) perl $bin/makeCovidHgiGwas.pl $db $studies $f.txt R4 $neg $pos > $f.bed bedSort $f.bed $f.sorted.bed bedToBigBed -type=bed9+12 -as=$bin/covidHgiGwas.as -tab $f.sorted.bed $sizes $bb.bb ln -s `pwd`/$bb.bb /gbdb/$db/covidHgiGwas end 'EOF' csh reColor.csh >&! reColor.out & tail -100f reColor.out