#!/bin/bash

#Not relevant for home server
#module load Bowtie2/2.3.5.1-GCC-8.3.0
#module load HTSeq/0.11.2-foss-2019b-Python-3.7.4
#module load R-bundle-Bioconductor/3.10-foss-2019b

#Set this to the output directory you want to work on everything in MAKE THIS DIRECTORY BEFORE STARTING
work_dir=/home/breaky/mramseylab/RNAseq/2025_jasmer_hp/

#Set this to the fastq input file directory (OBSOLETE REMOVE THIS AND BELOW LINE IF v3 WORKS)
#fq_dir=/data/mramseylab/raw_reads/2024_valm/cmat_anaes/

#name of the input metadata file, must be created in specific way detailed in RNASeq protocol. 
filename=$work_dir'metadata.tsv'

genomedir=/home/breaky/mramseylab/genomes/
cmat1path=/home/breaky/mramseylab/genomes/cmat_atcc14266/
hpara1path=/home/breaky/mramseylab/genomes/hpara_392/
EL1path=/home/breaky/mramseylab/genomes/hpara_EL1/
anaespath=/home/breaky/mramseylab/genomes/a_naeslundii_atcc12104/ 
#Note add more paths and genomes to the above list

#comment out if rerunning analysis
#mkdir alignment
#mkdir figures
#mkdir kegg_analysis
#mkdir rnaseq_analysis

#This section scrapes the metadata file row by row, 
#each colum gets cut in order and used to assign variables 
#to the inputs for the rest of script
#while read line
#do
    #calls filename from column 1 of tsv to assign variable in the next function
#    fastq=$(echo "$line" | cut -f1)
    #gets filename prefix so it can assign to R1 and R2 bowtie paired end reads
#    PREFIX1="${fastq%R1.fastq.gz}"
    #gets informative name from tsv file column 2 to label htseq count output files for DeSeq2 script to run properly
#    category=$(echo "$line" | cut -f2) 
    #this defines genome prefix name from tsv column 3, used in below 'case' command to call up the right genome folder for alignment
#    index=$(echo "$line" | cut -f3)
    #gets the directory location for the fastqfile from tsv column 4
#    fq_dir=$(echo "$line" | cut -f4)
    #This pulls column 3 prefix data and assigns to the right genome folder
    #FOR each new genome add path to directory at top of script and add prefix for bowtie genome indexing below
#    case $index in
#        cmatr)
#            indexpath=$cmat1path
#            ;;
#        hpara)
#            indexpath=$hpara1path
#            ;;
#        EL1)
#            indexpath=$EL1path
#            ;;
#        anaes)
#            indexpath=$anaespath
#            ;;
#        *)
#            indexpath=$nope
#            echo "invalid genome prefix"
#            ;;
#    esac 


bowtie2 -x $indexpath"$index" -1 /mnt/user/data/mramseylabdata/raw_reads/2025_jasmer_Hp/hpara_mono_3_S298_L005_R/mnt/user/data/mramseylabdata/raw_reads/2025_jasmer_Hp/hpara_mono_3_S298_L005_R2.fastq.gz1.fastq.gz -2 -S $PREFIX1.sam
# for below line "-s reverse -t CDS" may not be correct
htseq-count -s reverse -t CDS -a 1 -m intersection-nonempty -i ID $PREFIX1.sam $index.gff > $category.txt

#done < "$filename"


#mv *.sam ./alignment    
Rscript deseq_tablesandkegg.R

# to run this in background type "nohup ./scriptname.sh &" without quotes or try "nohup ./yourscript > logfilename 2>&1 &"