# db: hg38 # inputFile: input/hgvs/clinVarHgvs.txt # These are some HGVS-mostly terms taken from ClinVar. In order to test conversion to genomic # coordinates, I used Mutalyzer's web API to get coords for the ClinVar c. expressions using # this command in lib/tests/: # #foreach t (`grep -v ^# input/hgvs/clinVarHgvs.txt | grep -F c.`) # set tEnc = `echo -n "$t" | cgiEncode.pl` # curl -s "https://mutalyzer.nl/json/numberConversion?build=hg38&variant=$tEnc" \ # | perl -wpe 'if (m/^\["NC_0+(\d+)\.\d+:g\.([\d_]+).*/) { \ # $c = $1; $p = $2; \ # if ($p =~ m/(\d+)_(\d+)/) { \ # $s = $1 - 1; $e = $2; \ # } else { \ # $s = $p - 1; $e = $p; \ # } $_ = "chr$c\t$s\t$e\t'"$t"'\n"; }' # sleep 2 #end # There was a glitch which I repeated manually and a couple terms that Mutalyzer didn't # like as-is, so I used modified terms to get genomic coords from Mutalyzer. chr1 9264353 9264353 NM_004285.3(H6PD):c.1860_1861insACAGGTGGTTGACCTGTGGCCGGGTCTGA 0 + chr1 40258320 40258321 NM_005857.4(ZMPSTE24):c.50delA 0 + chr1 27549570 27549587 NM_001029882.3(AHDC1):c.2529_2545del17 0 - chr1 40258324 40258325 NM_005857.4(ZMPSTE24):c.54dupT 0 + chr1 17028689 17028693 NM_003000.2(SDHB):c.330_333dupTCTA 0 - chr1 11801333 11801372 NM_005957.4(MTHFR):c.264_302dup39 0 - chr1 10629865 10629867 NM_004565.2(PEX14):c.1013_1014delACinsGT 0 + chr1 40092119 40092136 NM_000310.3(PPT1):c.271_287del17insTT 0 - chr1 23796488 23796489 NM_001008216.1(GALE):c.873+20C>T 0 - chr1 11256193 11256194 NM_004958.3(MTOR):c.505-2A>G 0 - chr1 11966984 11966985 NM_000302.3(PLOD1):c.1651-2delA 0 + chr1 10258485 10258487 NM_015074.3(KIF1B):c.184-7_184-6delTT 0 + chr1 10267565 10267566 NM_015074.3(KIF1B):c.608+8dupA 0 + chr1 197122045 197122046 NM_018136.4(ASPM):c.3742-3dup 0 - chr1 17027878 17027881 NM_003000.2(SDHB):c.424-16_424-14dupTTC 0 - chr1 11134341 11134343 NM_004958.3(MTOR):c.5246+8_5246+9delCCinsGT 0 - chr1 17023966 17023973 NM_003000.2(SDHB):c.642_642+6delGGTGAGG 0 - chr1 231573657 231677388 NM_001164556.1(DISC1):c.-53210_67+50454dup 0 + chr1 40257002 40257003 NM_005857.4(ZMPSTE24):c.-211-1058C>G 0 + chr1 11981954 11981955 NM_014874.3(MFN2):c.-149-15T>G 0 + chr1 11803155 11803157 NM_005957.4(MTHFR):c.-13-28_-13-27delCT 0 - chr1 7961867 7961885 NM_007262.4(PARK7):c.-24+75_-24+92dup 0 + chr1 40073531 40073535 NM_000310.3(PPT1):c.*526_*529delATCA 0 - chr1 11022736 11022737 NM_007375.3(TARDBP):c.*83T>C 0 + chr1 11846007 11846009 NM_006172.3(NPPA):c.456_*1delAA 0 - chr19 18786046 18786049 NM_000095.2(COMP):c.1405_1407GAC[7] 0 - # # 8/25/17 -- Temporarily commenting out test case due to hgwdev vs hgwbeta ncbiRefSeqPsl diffs: # #NM_000218.2(KCNQ1):c.1071G>Y chr11 14968068 14968072 NM_001033952.2(CALCA):c.*727_*730T[4] 0 - chr14 50628429 50628430 NM_001127713.1(ATL1):c.1519dupA 0 + chrX 18564525 18564527 NM_003159.2(CDKL5):c.145+4_145+5AT[15] 0 + # This one actually falls on a part of the transcript that is not aligned to hg38! # NM_003000.2(SDHB):c.*159_*184delinsGAACCTGTTCCTTTACTTGCCCCAA # Mutalyzer could not parse this one: # NM_001128425.1(MUTYH):c.349-?_*(1_?)del # so save it for when we have a real HGVS parser. # Mutalyzer did not like NM_000478.5(ALPL):c.330C= # It parsed NM_000478.5(ALPL):c.330C>C but it doesn't have the .5 version # It accepted NM_000478.4(ALPL):c.330C>C and fortunately the coords match hgvsTester output... chr1 21563141 21563142 NM_000478.5(ALPL):c.330C= 0 + # For p. terms I manually used the mutalyzer "Back Translator" to get NM_:c. terms, # and then used the Position Converter to get hg38 positions. chr1 8361232 8361235 NP_036234.3:p.Thr758Serfs 0 - chr1 155240657 155240660 NP_001005741.1:p.Leu29Alafs*18 0 - chr1 9262270 9262273 NP_004276.2:p.Val320= 0 + chr1 11801329 11801335 NP_005948.3:p.Pro101_Gly102insLeuTyrIleAspValThrTrpHisProAlaGlyAspPro 0 - chr1 32806523 32806535 NP_003671.1:p.Val153_Val156del 0 - chr1 11966259 11966262 NP_000293.2:p.Glu532del 0 + chr1 40092118 40092136 NP_000301.1:p.Gln91_Cys96delinsPhe 0 - # Note: the following three actually have .10 (hg19) in ClinVar, edited to .11 here. chr1 11850845 11867218 NC_000001.11:g.11850846_11867218dup16373 0 + chr1 16782350 17359598 NC_000001.11:g.16782351_17359598del577248 0 + chr1 35570363 35656664 NC_000001.11:g.35570364_35656664dup86301 0 + # Mutalyzer could not parse this one: # NC_000001.11:g.(?_42925375)_(42959176_?)del #chr1 42925374 42959176 NC_000001.11:g.(?_42925375)_(42959176_?)del # Sometimes they only use a gene symbol -- Mutalyzer requires that to be converted to versioned NM_: chr13 113731320 113731321 GRK1:c.1172C>A 0 + chr16 15798698 15798701 MYH11:c.503-14_503-12del 0 - chr17 49404262 49404263 PHB1:c.*729C>T 0 -