#!/bin/bash
#SBATCH -t 4:00:00
#SBATCH --nodes=2 --ntasks-per-node=2
#SBATCH --mem=48g
#SBATCH --export=NONE
#SBATCH --mail-user=alex_labossiere@uri.edu
#SBATCH --mail-type=BEGIN,END,FAIL

module load QIIME2/2019.7
rawdir=/data/mramseylab/raw_reads/2023_SUPP_V1V2
visdir=/data/mramseylab/visualizations/AL_2023
metadata=/data/mramseylab/metadata/plaque_meta_5.tsv

# load package and set short cuts to allow easy reproducibility
# table summarize will give info on how many sequences are associated with each sample. This has useful graphs and info w/ sum. stats.
# table tabulate-seqs will map feature IDs to sequences and BLAST that to NCBI. this stuff will be used later on

  qiime dada2 denoise-single \
    --i-demultiplexed-seqs $rawdir/demux-v1v2-master.qza \
    --p-trim-left 6 \
    --p-trunc-len 275 \
    --o-table $rawdir/denoise-table-v1v2.qza \
    --o-representative-sequences $rawdir/rep-seqs-v1v2.qza \
    --o-denoising-stats $rawdir/denoising-stats-v1v2.qza

# Single denoise was done over double due to the lost in reads we get, single reads allow trunication to appropirate quality without throwing away too many samples
# input is qza from last script (input.sh) output is more qzas
# output is ASVs that are denoised or "cleaner"


# table summarize will give info on how many sequences are associated with each sample. This has useful graphs and info w/ sum. stats.
# table tabulate-seqs will map feature IDs to sequences and BLAST that to NCBI. this stuff will be used later on
#will likely be the slowest script to run

qiime feature-table summarize \
  --i-table $rawdir/denoise-table-v1v2.qza \
  --o-visualization $visdir/master-table-sum-v1v2.qzv \
  --m-sample-metadata-file $metadata \

qiime feature-table tabulate-seqs \
  --i-data $rawdir/rep-seqs-v1v2.qza \
  --o-visualization $visdir/rep-seqs-v1v2.qzv

#now it seems that phylogeny is next
#qiime2021.11 code is shorter and gives us same artifacts

qiime phylogeny align-to-tree-mafft-fasttree \
  --i-sequences $rawdir/rep-seqs-v1v2.qza \
  --o-alignment $rawdir/aligned-master-rep-seqs-v1v2.qza \
  --o-masked-alignment $rawdir/masked-aligned-master-rep-seqs-v1v2.qza \
  --o-tree $rawdir/unrooted-tree-master-v1v2.qza \
  --o-rooted-tree $rawdir/rooted-tree-master-v1v2.qza
# now we us a command that will take out IDs an sequences, and start tree phylogeny in order to do diversity core metrics
# will producde an unrooted and rooted tree these are cruical for work downstream, keep the output of all these files in a place where they wont get lost easily