#!/bin/bash
#SBATCH -t 4:00:00
#SBATCH --nodes=1 --ntasks-per-node=1
#SBATCH --mem=24g
#SBATCH --export=NONE
#SBATCH --mail-user=alex_labossiere@uri.edu
#SBATCH --mail-type=BEGIN,END,FAIL

# this script is a way to make the data more uniform, you must look the summary table made to
# figure out what numbers you need depending on what youre doing

module load QIIME2/2024.2


rawdir=/data/mramseylab/raw_reads/2024_AL_SUPP
filtdir=/data/mramseylab/16S/proc_reads/2024_AL_SUPP_6000/FS5BS5
visdir=/data/mramseylab/16S/visualizations/AL_2024/FS5BS5-filtered
metadir=/data/mramseylab/16S/metadata/meta_BSFS.tsv

SAMPLEFREQ=1500 # min amount of features you want within samples (smallest amount of total squence counts of samples)
SAMPLEFEAT=4  # min frequency of features that appear within samples (filters features across all samples lower than that number  )
MINSAMP=2       # min frequency of a freature found total (filters out feature seen only if in that many samples )
MINFEAT=2      # min times feature seen within sample  (filters out samples containing that amount of features)


if qiime feature-table filter-samples \
  --i-table $rawdir/pseudo-denoise-table-2024.qza \
  --m-metadata-file $metadir \
  --p-where "[variable-type]='expirmental'" \
  --o-filtered-table $filtdir/expirmental-samples-table.qza; then
echo "subject filtered"
fi 

if qiime feature-table filter-samples \
  --i-table $filtdir/expirmental-samples-table.qza \
  --m-metadata-file $metadir \
  --p-where "[group] IN ('FS5', 'BS5')" \
  --o-filtered-table $filtdir/FS5-BS5.qza; then
echo "subject filtered"
fi

if  qiime feature-table filter-samples \
  --i-table $filtdir/FS5-BS5.qza \
  --p-min-frequency $SAMPLEFREQ \
  --o-filtered-table $filtdir/frequency-filtered-table.qza; then
  echo "samples containing less than $SAMPLEFREQ features are now filtered"
fi

if qiime feature-table filter-features \
  --i-table $filtdir/frequency-filtered-table.qza \
  --p-min-frequency $SAMPLEFEAT \
  --o-filtered-table $filtdir/AGAIN-frequency-filtered-table.qza; then
  echo "features filtered. total frequency of features is less than $SAMPLEFEAT"
fi

if qiime feature-table filter-features \
  --i-table $filtdir/AGAIN-frequency-filtered-table.qza \
  --p-min-samples $MINSAMP \
  --o-filtered-table $filtdir/contingency-filtered-table.qza; then
  echo "features filtered out if not in more than $MINSAMP samples now"
fi

if qiime feature-table filter-samples \
  --i-table $filtdir/contingency-filtered-table.qza \
  --p-min-features $MINFEAT \
  --o-filtered-table $filtdir/filtered-reads.qza; then
  echo "features within samples appearing less than $MINFEAT are now filtered out"
fi

#visualization

if qiime feature-table summarize \
  --i-table $filtdir/filtered-reads.qza \
  --o-visualization $visdir/filtered-reads-vis.qzv \
  --m-sample-metadata-file $metadata \
; then echo "visluzation of filtered data made"
fi


if [ -e "$filtdir/filtered_samples.qza" ]; then echo "Now use this file for the alpha and beta scripts, using youre known sequencing depth from earlier"

fi 


