#!/bin/bash
#SBATCH -t 4:00:00
#SBATCH --nodes=1 --ntasks-per-node=1
#SBATCH --mem=24g
#SBATCH --export=NONE
#SBATCH --mail-user=alex_labossiere@uri.edu
#SBATCH --mail-type=BEGIN,END,FAIL

# this script is a way to make the data more uniform, you must look the summary table made to
# figure out what numbers you need depending on what youre doing

module load QIIME2/2024.2


rawdir=/data/mramseylab/raw_reads/2024_AL_SUPP
filtdir=/data/mramseylab/16S/proc_reads/2024_AL_SUPP_6000/filteredsamples_2024
visdir=/data/mramseylab/16S/visualizations/AL_2024
metadata=/data/mramseylab/16S/metadata/meta_BSFS.tsv

SAMPLEFREQ=2500 # min amount of features you want within samples (smallest amount of total squence counts of samples)
SAMPLEFEAT=24   # min frequency of features that appear within samples (filters features across all samples lower than that number  )
MINSAMP=2       # min frequency of a freature found total (filters out feature seen only if in that many samples )
MINFEAT=10      # min times feature seen within sample  (filters out samples containing that amount of features)

if qiime feature-table filter-samples \
  --i-table $rawdir/pseudo-denoise-table-2024.qza \
  --p-min-frequency $SAMPLEFREQ \
  --o-filtered-table $filtdir/sample-frequency-filtered-table.qza; then
  echo "samples containing less than $SAMPLEFREQ features are now filtered"
fi

if qiime feature-table filter-features \
  --i-table $filtdir/sample-frequency-filtered-table.qza \
  --p-min-frequency $SAMPLEFEAT \
  --o-filtered-table $filtdir/feature-frequency-filtered-table.qza; then
  echo "features filtered. total frequency of features is less than $SAMPLEFEAT"
fi

if qiime feature-table filter-features \
  --i-table $filtdir/feature-frequency-filtered-table.qza \
  --p-min-samples $MINSAMP \
  --o-filtered-table $filtdir/sample-contingency-filtered-table.qza; then
  echo "features filtered out if not in more than $MINSAMP samples now"
fi

if qiime feature-table filter-samples \
  --i-table $filtdir/sample-contingency-filtered-table.qza \
  --p-min-features $MINFEAT \
  --o-filtered-table $filtdir/filtered_samples.qza; then
  echo "features within samples appearing less than $MINFEAT are now filtered out"
fi

#visualization

if qiime feature-table summarize \
  --i-table $filtdir/filtered_samples.qza \
  --o-visualization $visdir/filteredtable.qzv \
  --m-sample-metadata-file $metadata \
; then echo "visluzation of filtered data made"
fi


if [ -e "$filtdir/filtered_samples.qza" ]; then echo "Now use this file for the alpha and beta scripts, using youre known sequencing depth from earlier"

fi 
