#!/bin/bash
#SBATCH -t 4:00:00
#SBATCH --nodes=1 --ntasks-per-node=1
#SBATCH --mem=24g
#SBATCH --export=NONE
#SBATCH --mail-user=alex_labossiere@uri.edu
#SBATCH --mail-type=BEGIN,END,FAIL

# this script is a way to make the data more uniform, you must look the summary table made to
# figure out what numbers you need depending on what youre doing

module load QIIME2/2024.2


rawdir=/data/mramseylab/16S/proc_reads/2022_AL_SUPP/2022
filtdir=/data/mramseylab/16S/proc_reads/2022_AL_SUPP/2022
visdir=/data/mramseylab/16S/visualizations/AL_2022
metadir=/data/mramseylab/16S/metadata/plaque_meta_2.tsv

SAMPLEFREQ=1500 # min amount of features you want within samples (smallest amount of total squence counts of samples)
SAMPLEFEAT=2   # min frequency of features that appear within samples (filters features across all samples lower than that number  )
MINSAMP=2       # min frequency of a freature found total (filters out feature seen only if in that many samples )
MINFEAT=10      # min times feature seen within sample  (filters out samples containing that amount of features)


if qiime feature-table filter-samples \
  --i-table $rawdir/table-single-2022.qza \
  --m-metadata-file $metadir \
  --p-where "[subject]='SubjectA'" \
  --o-filtered-table $rawdir/subject-A-ONLY-filtered.qza; then
echo "subject filtered"
fi 

if qiime feature-table filter-samples \
  --i-table $rawdir/subject-A-ONLY-filtered.qza \
  --m-metadata-file $metadir \
  --p-where "[environmental-setting]='hydroxyapatite' OR [environmental-setting]='insitu' " \
  --o-filtered-table $rawdir/HA-filt.qza; then
echo "subject filtered"
fi


if qiime feature-table filter-samples \
  --i-table $rawdir/HA-filt.qza \
  --m-metadata-file $metadir \
  --p-where "[media-types]='Saliva' OR [environmental-setting]='insitu'" \
  --o-filtered-table $rawdir/A-HA-Saliva-table.qza; then
echo "subject filtered"
fi

if  qiime feature-table filter-samples \
  --i-table $rawdir/A-HA-Saliva-table.qza \
  --p-min-frequency $SAMPLEFREQ \
  --o-filtered-table $filtdir/freq-filtered-table.qza; then
  echo "samples containing less than $SAMPLEFREQ features are now filtered"
fi

if qiime feature-table filter-features \
  --i-table $filtdir/freq-filtered-table.qza \
  --p-min-frequency $SAMPLEFEAT \
  --o-filtered-table $filtdir/AGAIN-freq-filtered-table.qza; then
  echo "features filtered. total frequency of features is less than $SAMPLEFEAT"
fi

if qiime feature-table filter-features \
  --i-table $filtdir/AGAIN-freq-filtered-table.qza \
  --p-min-samples $MINSAMP \
  --o-filtered-table $filtdir/conting-filtered-table.qza; then
  echo "features filtered out if not in more than $MINSAMP samples now"
fi

if qiime feature-table filter-samples \
  --i-table $filtdir/conting-filtered-table.qza \
  --p-min-features $MINFEAT \
  --o-filtered-table $filtdir/filter-final-single.qza; then
  echo "features within samples appearing less than $MINFEAT are now filtered out"
fi

#visualization

if qiime feature-table summarize \
  --i-table $filtdir/filter-final-single.qza \
  --o-visualization $visdir/summary-table-feature.qzv \
  --m-sample-metadata-file $metadata \
; then echo "visluzation of filtered data made"
fi


if [ -e "$filtdir/filtered-final-single.qza" ]; then echo "Now use this file for the alpha and beta scripts, using youre known sequencing depth from earlier"

fi 
