#!/usr/bin/env ruby
require 'rubygems'
require 'fileutils'

def dbsnp_convert_to_hgvs(vcf_file)
    status = 0
    dbsnp_hgvs_out_file = nil
    out_file = "#{vcf_file}.hgvs" 

    $stderr.puts(__FILE__, __method__, "STATUS", "Process dbSNP VCF file with hgvs_from_dbsnp utility")

    hgvs_out_status = hgvs_from_dbsnp(vcf_file, out_file)
    dbsnp_hgvs_out_file = out_file
end

def hgvs_from_dbsnp(filename, outfile)
    status = -1
    begin

    fstr = nil
    if filename =~ /\.gz$/
        fstr = IO.popen("gzip -dc #{filename}")
    else
        fstr = File.open(filename)
    end

    ostr = File.open(outfile, 'w')

    fstr.each_line { |x|
        next if x =~ /^#/
            x.strip!
            x = x.split("\t")
            refseq = x[0]
            pos = x[1]
            rs  = x[2]
            ref = x[3]
            raise "Incorrect rs ID: #{rs}" if rs !~ /^rs\d+$/
            rs = rs[2..-1]
            x[4].split(',').each { |alt|
                ref2 = ref.dup
                    while ref2[-1] == alt[-1] and ref2.size > 1 and alt.size > 1
                        ref2.chop!
                            alt.chop!
                            end
                            hgvs = refseq.dup
                            hgvs << ":g." << pos
                            if ref2.size == 0
                                # insertion
                                hgvs << "_" << (pos.to_i+1).to_s << "ins" << alt
                                    elsif ref2.size == 1 and alt.size == 1
                                    # SNV
                                    hgvs << ref2 << ">" << alt
                                    elsif alt.size == 0 and ref2.size == 1
                                    # deletion of single bp
                                    hgvs << "del" 
                                    elsif alt.size == 0 and ref2.size > 1
                                    # deletion
                                    hgvs << "_" << (pos.to_i+ref2.size-1).to_s << "del" 
                                    elsif ref2.size == 1
                                    # indel with single bp
                                    hgvs << "delins" << alt
                            else
                                # indel
                                hgvs << "_" << (pos.to_i+ref2.size-1).to_s << "delins" << alt
                            end
                            ostr.write("#{hgvs}\t#{rs}\n")
            }
    }

    fstr.close()
    ostr.close()

    rescue Exception => err
        status = -1
        $stderr.puts(__FILE__, __method__, "hgvs_from_dbsnp function ERROR", "#{err}")
        return status
    ensure
        return status
    end
end

dbsnp_convert_to_hgvs(ARGV[0])