#!/bin/bash
#SBATCH -t 4:00:00
#SBATCH --nodes=1 --ntasks-per-node=1
#SBATCH --mem=288g
#SBATCH --export=NONE
#SBATCH --mail-user=alex_labossiere@uri.edu
#SBATCH --mail-type=BEGIN,END,FAIL

echo "emails sent"

module load QIIME2/2024.2


if qiime rescript cull-seqs \
    --i-sequences HOMD_16S_otu.qza \
    --o-clean-sequences HOMD_16S_otu-cleaned.qza ; then
    echo "recript cull done"
fi

if    qiime rescript filter-seqs-length-by-taxon \
    --i-sequences HOMD_16S_otu-cleaned.qza \
    --i-taxonomy ref_taxonomy.qza \
    --p-labels Archaea Bacteria Eukaryota \
    --p-min-lens 900 1200 1400 \
    --o-filtered-seqs HOMD_16S_otu-seqs-filt.qza \
    --o-discarded-seqs HOMD_16S_otu-seqs-discard.qza ; then
    echo "filter seq done"
fi

if    qiime rescript dereplicate \
    --i-sequences HOMD_16S_otu-seqs-filt.qza \
    --i-taxa ref_taxonomy.qza \
    --p-mode 'uniq' \
    --o-dereplicated-sequences HOMD_16S_otu-seqs-filt-derep-lca.qza \
    --o-dereplicated-taxa ref_taxonomy-derep-lca.qza ;then
    echo "dereplicated"
fi

if  qiime feature-classifier extract-reads \
    --i-sequences HOMD_16S_otu-seqs-filt-derep-lca.qza \
    --p-f-primer AGAGTTTGATYMTGGCTCAG \
    --p-r-primer TGCTGCCTCCCGTAGRAGT \
    --p-n-jobs 2 \
    --p-read-orientation 'forward' \
    --o-reads HOMD-seqs-v1-v2.qza ; then
    echo "primers aligned"
fi

if qiime rescript dereplicate \
    --i-sequences HOMD_16S_otu-seqs-filt.qza \
    --i-taxa ref_taxonomy.qza \
    --p-mode 'uniq' \
    --o-dereplicated-sequences HOMD_16S_otu-seqs-filt-derep-lca.qza \
    --o-dereplicated-taxa ref_taxonomy-derep-lca.qza ;then
    echo "dereplicated again"
fi

if qiime feature-classifier fit-classifier-naive-bayes \
    --i-reference-reads HOMD_16S_otu-seqs-filt-derep-lca.qza \
    --i-reference-taxonomy ref_taxonomy-derep-lca.qza \
    --o-classifier HOMD-classifier.qza ; then
    echo "classifier made"
fi
