DATA_DIR=$1
MODEL_NAME=$2
FOLD=$3
GPU=$4

export CUDA_VISIBLE_DEVICES=$GPU

MODEL_DIR=models/${MODEL_NAME}_native/fold_$FOLD
REFERENCE_DIR=../data
CHROM_SIZES=$REFERENCE_DIR/hg38.chrom.sizes
REFERENCE_GENOME=$REFERENCE_DIR/hg38.genome.fa
CV_SPLITS=$REFERENCE_DIR/folds/fold_$FOLD.json
INPUT_DATA=input_data/${MODEL_NAME}_native/input_data_train.json
MODEL_PARAMS=input_data/${MODEL_NAME}_native/bpnet_params.json

mkdir -p $MODEL_DIR
bpnet-train \
    --input-data $INPUT_DATA \
    --output-dir $MODEL_DIR \
    --reference-genome $REFERENCE_GENOME \
    --chroms $(paste -s -d ' ' $REFERENCE_DIR/chroms.txt) \
    --chrom-sizes $CHROM_SIZES \
    --splits $CV_SPLITS \
    --model-arch-name BPNet \
    --model-arch-params-json $MODEL_PARAMS \
    --sequence-generator-name BPNet \
    --model-output-filename model \
    --input-seq-len 2114 \
    --output-len 1000 \
    --shuffle \
    --threads 10 \
    --epochs 100 \
	--batch-size 64 \
	--reverse-complement-augmentation \
	--early-stopping-patience 10 \
	--reduce-lr-on-plateau-patience 5 \
        --learning-rate 0.001
