{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['entity:production_experiment_id', 'predictions_and_metrics_chrM_production_run_1_encode_fold0'], dtype='object')"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import subprocess as sp\n",
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "\n",
    "experiments_pd = pd.read_csv(\"/oak/stanford/groups/akundaje/vir/tfatlas/other_data/chrM_predictions/fold0/production_experiment.tsv\",sep=\"\\t\")\n",
    "experiments_pd.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def download_file(download_url,download_location = '.'):\n",
    "    comm = [\"~/google-cloud-sdk/bin/gsutil\"]\n",
    "    comm += [\"cp\", \"-n\"]\n",
    "    comm += [download_url]\n",
    "    comm += [download_location]\n",
    "\n",
    "    proc = sp.Popen(\" \".join(comm),stderr=sp.PIPE,shell=True)\n",
    "    return proc.communicate()\n",
    "\n",
    "def download_prediction(experiment_name):\n",
    "    experiment = experiments_pd[experiments_pd['entity:production_experiment_id']==experiment_name].reset_index(drop=True)\n",
    "    if not os.path.exists(f\"/oak/stanford/groups/akundaje/vir/tfatlas/other_data/chrM_predictions/fold0/\"+experiment[\"entity:production_experiment_id\"][0]):\n",
    "        os.mkdir(f\"//oak/stanford/groups/akundaje/vir/tfatlas/other_data/chrM_predictions/fold0/\"+experiment[\"entity:production_experiment_id\"][0])\n",
    "    try:\n",
    "        for prediction_url in experiment['predictions_and_metrics_chrM_production_run_1_encode_fold0'][0].replace('\"', \"\").replace('[', \"\").replace(']', \"\").split(','):\n",
    "            results = download_file(prediction_url,f\"//oak/stanford/groups/akundaje/vir/tfatlas/other_data/chrM_predictions/fold0/\"+experiment[\"entity:production_experiment_id\"][0])\n",
    "    except:\n",
    "        print(\"failed:\",experiment[\"entity:production_experiment_id\"][0])\n",
    "        \n",
    "from multiprocessing import Pool\n",
    "with Pool(20) as p:\n",
    "    p.map(download_prediction, experiments_pd['entity:production_experiment_id'].tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "basepairmodels_latest",
   "language": "python",
   "name": "basepairmodels_latest"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
