import numpy as np

x = np.random.uniform(low=-1, high=1, size=5000000)
y = np.random.uniform(low=-1, high=1, size=5000000)

import math


def r_python(x_vec, y_vec):
    s = 0
    for x, y in zip(x_vec, y_vec):
        s += math.cos(x) * math.sin(y)
    return s

r_python(x, y)

-2023.021642550174

%timeit r_python(x,y)

824 ms ± 2.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

def r_numpy(x_vec, y_vec):
    return np.dot(np.cos(x_vec), np.sin(y_vec))

r_numpy(x, y)

-2023.021642549968

%timeit r_numpy(x,y)

134 ms ± 310 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)

# pip install cython
%load_ext cython

%%cython

import math

def r_cython(x_vec, y_vec):
    s = 0
    for x,y in zip(x_vec, y_vec):
        s += math.cos(x) * math.sin(y)
    return s

r_cython(x, y)

-2023.021642550174

%timeit r_cython(x,y)

1.01 s ± 1.51 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

%%cython

import math
# use C math functions
from libc.math cimport sin, cos

# use C types instead of Python types
def r_cython(double[:] x_vec, double[:] y_vec):
    cdef double s = 0
    cdef int i
    for i in range(len(x_vec)):
        s += cos(x_vec[i])*sin(y_vec[i])
    return s

r_cython(x, y)

-2023.021642550174

%timeit r_cython(x,y)

100 ms ± 58.9 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)

if "google.colab" in str(get_ipython()):
    !pip install fortran-magic -qqq
%load_ext fortranmagic

%%fortran

subroutine r_fortran(x_vec, y_vec, res)
    real, intent(in) :: x_vec(:), y_vec(:)
    real, intent(out) :: res
    integer :: i, n
    n = size(x_vec)
    res = 0
    do i=1,n
        res = res + cos(x_vec(i))*sin(y_vec(i))
    enddo
endsubroutine r_fortran

r_fortran(x, y)

-2023.0682373046875

%timeit r_fortran(x,y)

28.2 ms ± 196 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)

if "google.colab" in str(get_ipython()):
    !pip install git+https://github.com/aldanor/ipybind.git -qqq
%load_ext ipybind

%%pybind11

#include <pybind11/numpy.h>
#include <math.h>
PYBIND11_PLUGIN(example) {
    py::module m("example");
    m.def("r_pybind", [](const py::array_t<double>& x, const py::array_t<double>& y) {
        double sum{0};
        auto rx{x.unchecked<1>()};
        auto ry{y.unchecked<1>()};
        for (py::ssize_t i = 0; i < rx.shape(0); i++){
            sum += std::cos(rx[i])*std::sin(ry[i]);
        }
        return sum;
    });
    return m.ptr();
}

r_pybind(x, y)

-2023.021642550174

%timeit r_pybind(x, y)

98 ms ± 827 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)

from numba import jit


@jit
def r_numba(x_vec, y_vec):
    s = 0
    for x, y in zip(x_vec, y_vec):
        s += math.cos(x) * math.sin(y)
    return s

r_numba(x, y)

-2023.021642550174

# pure python with numba JIT
%timeit r_numba(x,y)

101 ms ± 1.13 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

from numba import float32


@jit(float32(float32, float32))
def sum(a, b):
    return a + b

sum(1, 0.99999999)

2.0

from numba import jit, prange


@jit(parallel=True)
def r_numba(x_vec, y_vec):
    s = 0
    for i in prange(len(x_vec)):
        s += math.cos(x[i]) * math.sin(y[i])
    return s

r_numba(x, y)

-2023.0216425498936

%timeit r_numba(x,y)

34 ms ± 95 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)

from numba import vectorize, float64


@vectorize([float64(float64, float64)], target="parallel")
def r(x, y):
    return np.cos(x) * np.sin(y)

r(2, 3)

-0.05872664492762098

r(x, y)

array([-0.36093561, -0.67954435,  0.63029642, ...,  0.56882316,
        0.80986332,  0.35954116])

np.sum(r(x, y))

-2023.021642549969

%timeit np.sum(r(x,y))

56.4 ms ± 164 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)

Lunch Time Python¶

Lunch 6: numba¶

Motivation¶

numba installation¶

Vector reduction example¶

Python¶

numpy¶

Cython¶

Fortran¶

C++ / pybind11¶

numba¶

Numba compilation¶

Numba function signatures¶

Numba options¶

Parallelization¶

NumPy universal functions¶

Advanced features¶