#!/usr/bin/env bash # 63 plant # 71 protozoa # 83 invertebrate # 86 vertebrate_other # 94 vertebrate_mammalian # 174 fungi # 449 archaea # 4918 viral # 44701 bacteria # set -beEu -o pipefail function usage() { printf "usage: ncbiPartitioned.sh \n" 1>&2 printf "select viral or bacteria hierarchies to partition\n" 1>&2 } if [ $# -ne 1 ]; then usage exit 255 fi export subType=$1 if [ "${subType}" != "viral" -a "${subType}" != "bacteria" ]; then printf "ERROR: select viral or bacteria assemblies update\n" 1>&2 usage exit 255 fi printf "running subType: %s\n" "${subType}" 1>&2 declare -A pageTitles pageTitles=( ["bacteria"]="Bacterial" \ ["viral"]="Viral" ) pageTitle="${pageTitles["$subType"]}" export inside="/hive/data/inside/ncbi/genomes/refseq" export topDir="/hive/data/inside/ncbi/genomes/refseq/hubs" export dirLimit=1000 for groupName in "${subType}" do cd "${topDir}" partsIndex="${topDir}/${groupName}/${subType}.ncbi.html" rm -f "${partsIndex}" printf "%s\n" \ ' '$pageTitle' assembly hubs

Bacterial assembly hub

'$pageTitle' genome assembly hubs

Each link is a set of about 1,000 genomes in an assembly hub

%02d %s to %s

' >> "${partsIndex}" chmod +x "${partsIndex}" done # processing: for groupName in ${subType} cd "${topDir}" cat ${subType}/??/*.order.tab > ${subType}.order.tab cat ${subType}/??/*.total.stats.txt > ${subType}/${subType}.total.stats.txt exit $?