#!/usr/bin/env ruby
require 'rubygems'
require 'fileutils'
def dbsnp_convert_to_hgvs(vcf_file)
status = 0
dbsnp_hgvs_out_file = nil
out_file = "#{vcf_file}.hgvs"
$stderr.puts(__FILE__, __method__, "STATUS", "Process dbSNP VCF file with hgvs_from_dbsnp utility")
hgvs_out_status = hgvs_from_dbsnp(vcf_file, out_file)
dbsnp_hgvs_out_file = out_file
end
def hgvs_from_dbsnp(filename, outfile)
status = -1
begin
fstr = nil
if filename =~ /\.gz$/
fstr = IO.popen("gzip -dc #{filename}")
else
fstr = File.open(filename)
end
ostr = File.open(outfile, 'w')
fstr.each_line { |x|
next if x =~ /^#/
x.strip!
x = x.split("\t")
refseq = x[0]
pos = x[1]
rs = x[2]
ref = x[3]
raise "Incorrect rs ID: #{rs}" if rs !~ /^rs\d+$/
rs = rs[2..-1]
x[4].split(',').each { |alt|
ref2 = ref.dup
while ref2[-1] == alt[-1] and ref2.size > 1 and alt.size > 1
ref2.chop!
alt.chop!
end
hgvs = refseq.dup
hgvs << ":g." << pos
if ref2.size == 0
# insertion
hgvs << "_" << (pos.to_i+1).to_s << "ins" << alt
elsif ref2.size == 1 and alt.size == 1
# SNV
hgvs << ref2 << ">" << alt
elsif alt.size == 0 and ref2.size == 1
# deletion of single bp
hgvs << "del"
elsif alt.size == 0 and ref2.size > 1
# deletion
hgvs << "_" << (pos.to_i+ref2.size-1).to_s << "del"
elsif ref2.size == 1
# indel with single bp
hgvs << "delins" << alt
else
# indel
hgvs << "_" << (pos.to_i+ref2.size-1).to_s << "delins" << alt
end
ostr.write("#{hgvs}\t#{rs}\n")
}
}
fstr.close()
ostr.close()
rescue Exception => err
status = -1
$stderr.puts(__FILE__, __method__, "hgvs_from_dbsnp function ERROR", "#{err}")
return status
ensure
return status
end
end
dbsnp_convert_to_hgvs(ARGV[0])