#!/bin/bash
# 03_postprocess_vcf.sh
# Post-processing and tagging of somatic variants

# Columns:
# $1-11: standard VCF
# $12: sample name
# $13: mutation ID (Chr_POS_REF>ALT)
# $14: mutation type (snv/indel)
# $15: count of same mutation ID across samples
# $16: PASS tag if variant is tumor-specific

# Header
additional_header="\ttype\tcount\tpassALL"
echo -e "$header $additional_header" > integrated.vcf

# Mutation type annotation
awk 'BEGIN {OFS="\t"} (!/^#/) {
  if(length($4) + length($5) == 2) { print $0, "snv" }
  else { print $0, "indel" }
}' tmp4.vcf > tmp5.vcf

# Count mutation ID
awk 'BEGIN {OFS="\t"} NR == FNR { count[$13]++; next } {print $0, count[$13]}' tmp5.vcf tmp5.vcf >> tmp6.vcf

# Tag tumor-specific variants
awk 'BEGIN{OFS="\t"} {
  if($1 !~ /[MY]/ && $3 == "." &&  $7 == "PASS" && $15 == 1 )
    $16= "PASS";
  else
    $16= ".";
  print
}' tmp6.vcf >> integrated.vcf
