#!/usr/bin/env ruby require 'rubygems' require 'fileutils' def dbsnp_convert_to_hgvs(vcf_file) status = 0 dbsnp_hgvs_out_file = nil out_file = "#{vcf_file}.hgvs" $stderr.puts(__FILE__, __method__, "STATUS", "Process dbSNP VCF file with hgvs_from_dbsnp utility") hgvs_out_status = hgvs_from_dbsnp(vcf_file, out_file) dbsnp_hgvs_out_file = out_file end def hgvs_from_dbsnp(filename, outfile) status = -1 begin fstr = nil if filename =~ /\.gz$/ fstr = IO.popen("gzip -dc #{filename}") else fstr = File.open(filename) end ostr = File.open(outfile, 'w') fstr.each_line { |x| next if x =~ /^#/ x.strip! x = x.split("\t") refseq = x[0] pos = x[1] rs = x[2] ref = x[3] raise "Incorrect rs ID: #{rs}" if rs !~ /^rs\d+$/ rs = rs[2..-1] x[4].split(',').each { |alt| ref2 = ref.dup while ref2[-1] == alt[-1] and ref2.size > 1 and alt.size > 1 ref2.chop! alt.chop! end hgvs = refseq.dup hgvs << ":g." << pos if ref2.size == 0 # insertion hgvs << "_" << (pos.to_i+1).to_s << "ins" << alt elsif ref2.size == 1 and alt.size == 1 # SNV hgvs << ref2 << ">" << alt elsif alt.size == 0 and ref2.size == 1 # deletion of single bp hgvs << "del" elsif alt.size == 0 and ref2.size > 1 # deletion hgvs << "_" << (pos.to_i+ref2.size-1).to_s << "del" elsif ref2.size == 1 # indel with single bp hgvs << "delins" << alt else # indel hgvs << "_" << (pos.to_i+ref2.size-1).to_s << "delins" << alt end ostr.write("#{hgvs}\t#{rs}\n") } } fstr.close() ostr.close() rescue Exception => err status = -1 $stderr.puts(__FILE__, __method__, "hgvs_from_dbsnp function ERROR", "#{err}") return status ensure return status end end dbsnp_convert_to_hgvs(ARGV[0])