From 02fe75433dd3ec86e7e3ec31b7bf51ee330371f4 Mon Sep 17 00:00:00 2001
From: Stuart Archibald <redacted>
Date: Fri, 22 Feb 2019 10:26:45 +0000
Subject: [PATCH 10/15] intel init_mkl

---
 numpy/_distributor_init.py |  27 +++++++
 numpy/_mklinitmodule.c     | 154 +++++++++++++++++++++++++++++++++++++
 numpy/setup.py             |  14 +++-
 3 files changed, 194 insertions(+), 1 deletion(-)
 create mode 100644 numpy/_mklinitmodule.c

diff --git a/numpy/_distributor_init.py b/numpy/_distributor_init.py
index d893ba3..f4e623e 100644
--- a/numpy/_distributor_init.py
+++ b/numpy/_distributor_init.py
@@ -8,3 +8,30 @@ For example, this is a good place to put any checks for hardware requirements.
 The numpy standard source distribution will not put code in this file, so you
 can safely replace this file with your own version.
 """
+import sys
+
+class RTLD_for_MKL():
+    def __init__(self):
+        self.saved_rtld = None
+
+    def __enter__(self):
+        import ctypes
+        try:
+            self.saved_rtld = sys.getdlopenflags()
+            # python loads libraries with RTLD_LOCAL, but MKL requires RTLD_GLOBAL
+            # pre-load MKL with RTLD_GLOBAL before loading the native extension
+            sys.setdlopenflags(self.saved_rtld | ctypes.RTLD_GLOBAL)
+        except AttributeError:
+            pass
+        del ctypes
+
+    def __exit__(self, *args):
+        if self.saved_rtld:
+            sys.setdlopenflags(self.saved_rtld)
+            self.saved_rtld = None
+
+with RTLD_for_MKL():
+    from . import _mklinit
+
+del RTLD_for_MKL
+del sys
diff --git a/numpy/_mklinitmodule.c b/numpy/_mklinitmodule.c
new file mode 100644
index 0000000..b958333
--- /dev/null
+++ b/numpy/_mklinitmodule.c
@@ -0,0 +1,154 @@
+/* -*- c -*- */
+
+/*
+ * This is a dummy module whose purpose is to get distutils to generate the
+ * configuration files before the libraries are made.
+ */
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define NO_IMPORT_ARRAY
+
+#if (defined(USING_MKL_RT) && defined(__linux__))
+#define FORCE_PRELOADING 1
+#define _GNU_SOURCE 1
+#include <dlfcn.h>
+#include <string.h>
+#undef _GNU_SOURCE
+#endif
+
+#include <Python.h>
+#include <npy_pycompat.h>
+#include "mkl.h"
+
+static struct PyMethodDef methods[] = {
+    {NULL, NULL, 0, NULL}
+};
+
+static inline void _set_mkl_ilp64() {
+#ifdef USING_MKL_RT
+    int i = mkl_set_interface_layer(MKL_INTERFACE_ILP64);
+#endif
+    return;
+}
+
+static inline void _set_mkl_lp64() {
+#ifdef USING_MKL_RT
+    int i = mkl_set_interface_layer(MKL_INTERFACE_LP64);
+#endif
+    return;
+}
+
+static void _preload_threading_layer() {
+#if FORCE_PRELOADING
+#define VERBOSE(...) if(verbose) printf("Numpy + Intel(R) MKL: " __VA_ARGS__)
+#define SET_MTLAYER(L) do {                                      \
+            VERBOSE("setting Intel(R) MKL to use " #L " OpenMP runtime\n");  \
+            mkl_set_threading_layer(MKL_THREADING_##L);          \
+            setenv("MKL_THREADING_LAYER", #L, 0);                \
+        } while(0)
+#define PRELOAD(lib) do {                                        \
+            VERBOSE("preloading %s runtime\n", lib);             \
+            dlopen(lib, RTLD_LAZY|RTLD_GLOBAL);                  \
+        } while(0)
+    /*
+     * The following is the pseudo-code skeleton for reinterpreting unset MKL_THREADING_LAYER
+     *       
+     *       if MKL_THREADING_LAYER is empty
+     *            if kmp_calloc (or a suitable symbol identified by Terry) is loaded,
+     *                  we are using Intel (R) OpenMP, i.e. reinterpret as implicit value of INTEL
+     *            otherwise check if other Open MP is loaded by checking get_omp_num_threads symbol
+     *                  if not loaded: 
+     *                         assume INTEL, and force loading of IOMP5
+     *                  if loaded:
+     *                         if Gnu OMP, set MKL_THREADING_LAYER=GNU, and call set_mkl_threading_layer(MKL_THREADING_GNU)
+     *                         if other vendors?
+     *       if MKL_THREADING_LAYER is INTEL
+     *             force loading of iomp, to preempt possibility of other modules loading other OMP library before MKL is actually used
+     *
+     *       should we treat other possible values of MKL_THREADING_LAYER specially?
+     *
+     */
+
+    const char *libiomp = "libiomp5.so";
+    const char *verbose = getenv("MKL_VERBOSE");
+    const char *mtlayer = getenv("MKL_THREADING_LAYER");
+    void *omp = dlsym(RTLD_DEFAULT, "omp_get_num_threads");
+    const char *omp_name = "(unidentified)";
+    const char *iomp = NULL; /* non-zero indicates Intel OpenMP is loaded */
+    Dl_info omp_info;
+
+    if(verbose && (verbose[0] == 0 || atoi(verbose) == 0))
+        verbose = NULL;
+
+    VERBOSE("THREADING LAYER: %s\n", mtlayer);
+
+    if(omp) {
+        if(dladdr(omp, &omp_info)) {
+            omp_name = basename(omp_info.dli_fname); /* GNU version doesn't modify argument */
+            iomp = strstr(omp_name, libiomp);
+        }
+        VERBOSE("%s OpenMP runtime %s is already loaded\n", iomp?"Intel(R)":"Other vendor", omp_name);
+    }
+    if(!mtlayer || mtlayer[0] == 0) {                /* unset or empty */
+      if(omp) {                                      /* if OpenMP runtime is loaded */
+        if(iomp)                                     /* if Intel runtime is loaded */
+            SET_MTLAYER(INTEL);
+        else                                         /* otherwise, assume it is GNU OpenMP */
+            SET_MTLAYER(GNU);
+      } else {                                       /* nothing is loaded */
+          SET_MTLAYER(INTEL);
+          PRELOAD(libiomp);
+      }
+    } else if(strcasecmp(mtlayer, "intel") == 0) {   /* Intel runtime is requested */
+        if(omp && !iomp) {
+            fprintf(stderr, "Error: Numpy + Intel(R) MKL: MKL_THREADING_LAYER=INTEL is incompatible with %s library."
+                            "\n\tTry to import numpy first or set the threading layer accordingly. "
+                            "Set NPY_MKL_FORCE_INTEL to force it.\n", omp_name);
+            if(!getenv("NPY_MKL_FORCE_INTEL"))
+                exit(1);
+        } else
+            PRELOAD(libiomp);
+    }
+#endif
+    return;
+}
+
+static inline void _set_mkl_interface() {
+    _set_mkl_lp64();
+    _preload_threading_layer();
+}
+
+#if defined(NPY_PY3K)
+static struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT,
+    "mklinit",
+    NULL,
+    -1,
+    methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+#endif
+
+/* Initialization function for the module */
+#if defined(NPY_PY3K)
+PyMODINIT_FUNC PyInit__mklinit(void) {
+    PyObject *m;
+
+    _set_mkl_interface();
+    m = PyModule_Create(&moduledef);
+    if (!m) {
+        return NULL;
+    }
+
+    return m;
+}
+#else
+PyMODINIT_FUNC
+init_mklinit(void) {
+    _set_mkl_interface();
+    Py_InitModule("_mklinit", methods);
+}
+#endif
diff --git a/numpy/setup.py b/numpy/setup.py
index 4ccdaee..3d06cf6 100644
--- a/numpy/setup.py
+++ b/numpy/setup.py
@@ -4,8 +4,19 @@ from __future__ import division, print_function
 
 def configuration(parent_package='',top_path=None):
     from numpy.distutils.misc_util import Configuration
-    config = Configuration('numpy', parent_package, top_path)
+    from numpy.distutils.system_info import get_info
+
+    defs = []
+    libs = get_info('mkl').get('libraries', [])
+    if any(['mkl_rt' in li for li in libs]):
+        #libs += ['dl'] - by default on Linux
+        defs += [('USING_MKL_RT', None)]
 
+    config = Configuration('numpy', parent_package, top_path)
+    config.add_extension('_mklinit',
+                          sources=['_mklinitmodule.c'],
+                          define_macros=defs,
+                          extra_info=get_info('mkl'))
     config.add_subpackage('compat')
     config.add_subpackage('core')
     config.add_subpackage('distutils')
@@ -24,5 +35,6 @@ def configuration(parent_package='',top_path=None):
     config.make_config_py() # installs __config__.py
     return config
 
+
 if __name__ == '__main__':
     print('This is the wrong setup.py file to run')
-- 
2.20.1

