# EASI defaults
import git
import sys
import os
os.environ['USE_PYGEOS'] = '0'
repo = git.Repo('.', search_parent_directories=True).working_tree_dir
if repo not in sys.path: sys.path.append(repo)
from easi_tools import EasiDefaults, notebook_utils


easi = EasiDefaults()

Successfully found configuration for deployment "csiro"


size_of_vec = 2000*2000
X_list = range(size_of_vec)
Y_list = range(size_of_vec)


%%timeit -n 10 -r 1
Z = [X_list[i] + Y_list[i] for i in range(len(X_list)) ]

967 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 10 loops each)


import numpy
X = numpy.arange(size_of_vec)
Y = numpy.arange(size_of_vec)


%%timeit -n 10 -r 1
Z = X + Y

8.93 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 10 loops each)


print("Are the arrays identical?")
([X_list[i] + Y_list[i] for i in range(len(X_list)) ] == X + Y).all()

Are the arrays identical?

True


import numpy as np
from tqdm.notebook import tqdm

def load_eo_data():
    return np.random.random((1000, 1000))

def smooth(x):
    out = np.empty_like(x)
    for i in range(1, x.shape[0] - 1):
        for j in range(1, x.shape[1] - 1):
            out[i, j] = (x[i + -1, j + -1] + x[i + -1, j + 0] + x[i + -1, j + 1] +
                         x[i +  0, j + -1] + x[i +  0, j + 0] + x[i +  0, j + 1] +
                         x[i +  1, j + -1] + x[i +  1, j + 0] + x[i +  1, j + 1]) // 9
    return out

def save(x, filename):
    pass


%%time
for i in tqdm(range(5)):
    img = load_eo_data()
    img = smooth(img)
    img = np.fft.fft2(img)
    save(img, "file-" + str(i) + "-.dat")

  0%|          | 0/5 [00:00<?, ?it/s]

CPU times: user 10.7 s, sys: 11.1 ms, total: 10.7 s
Wall time: 10.7 s


import numba

fast_smooth  = numba.jit(smooth)


%%time

for i in tqdm(range(5)):
    img = load_eo_data()
    img = fast_smooth(img)
    img = np.fft.fft2(img)
    save(img, "file-" + str(i) + "-.dat")

  0%|          | 0/5 [00:00<?, ?it/s]

CPU times: user 1.45 s, sys: 19.7 ms, total: 1.47 s
Wall time: 1.46 s


import numpy as np

def load_eo_data():
    return np.random.random((1000, 1000))

@numba.jit
def smooth(x):
    out = np.empty_like(x)
    for i in range(1, x.shape[0] - 1):
        for j in range(1, x.shape[1] - 1):
            out[i, j] = (x[i + -1, j + -1] + x[i + -1, j + 0] + x[i + -1, j + 1] +
                         x[i +  0, j + -1] + x[i +  0, j + 0] + x[i +  0, j + 1] +
                         x[i +  1, j + -1] + x[i +  1, j + 0] + x[i +  1, j + 1]) // 9
    return out

def save(x, filename):
    pass


%%time
for i in tqdm(range(5)):
    img = load_eo_data()
    img = smooth(img)
    img = np.fft.fft2(img)
    save(img, "file-" + str(i) + "-.dat")

  0%|          | 0/5 [00:00<?, ?it/s]

CPU times: user 1.16 s, sys: 4.12 ms, total: 1.16 s
Wall time: 1.16 s


fast_smooth  = numba.jit(smooth)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[14], line 1
----> 1 fast_smooth  = numba.jit(smooth)

File /env/lib/python3.10/site-packages/numba/core/decorators.py:179, in jit(signature_or_function, locals, cache, pipeline_class, boundscheck, **options)
    176 wrapper = _jit(sigs, locals=locals, target=target, cache=cache,
    177                targetoptions=options, **dispatcher_args)
    178 if pyfunc is not None:
--> 179     return wrapper(pyfunc)
    180 else:
    181     return wrapper

File /env/lib/python3.10/site-packages/numba/core/decorators.py:191, in _jit.<locals>.wrapper(func)
    189 def wrapper(func):
    190     if extending.is_jitted(func):
--> 191         raise TypeError(
    192             "A jit decorator was called on an already jitted function "
    193             f"{func}.  If trying to access the original python "
    194             f"function, use the {func}.py_func attribute."
    195         )
    197     if not inspect.isfunction(func):
    198         raise TypeError(
    199             "The decorated object is not a function (got type "
    200             f"{type(func)})."
    201         )

TypeError: A jit decorator was called on an already jitted function CPUDispatcher(<function smooth at 0x7f7c3b220700>).  If trying to access the original python function, use the CPUDispatcher(<function smooth at 0x7f7c3b220700>).py_func attribute.


import numpy as np
import numba
from tqdm.notebook import tqdm


def load_eo_data():
    return np.random.random((1000, 1000))

@numba.jit
def smooth(x):
    out = np.empty_like(x)
    for i in range(1, x.shape[0] - 1):
        for j in range(1, x.shape[1] - 1):
            out[i, j] = (x[i + -1, j + -1] + x[i + -1, j + 0] + x[i + -1, j + 1] +
                         x[i +  0, j + -1] + x[i +  0, j + 0] + x[i +  0, j + 1] +
                         x[i +  1, j + -1] + x[i +  1, j + 0] + x[i +  1, j + 1]) // 9
    return out

def save(x, filename):
    pass


%%time
for i in tqdm(range(1000)):
    img = load_eo_data()
    img = smooth(img)
    img = np.fft.fft2(img)
    save(img, "file-" + str(i) + "-.dat")

  0%|          | 0/1000 [00:00<?, ?it/s]

CPU times: user 45.3 s, sys: 60.3 ms, total: 45.3 s
Wall time: 45.2 s


from dask.distributed import Client, LocalCluster, fire_and_forget

cluster = LocalCluster()
client = Client(cluster)
client

cluster, client = notebook_utils.initialize_dask(use_gateway=False, wait=True)
display(cluster if cluster else client)


notebook_utils.localcluster_dashboard(client=client, server=easi.hub)

'https://hub.csiro.easi-eo.solutions/user/csiro-csiro-aad_pag064@csiro.au/proxy/8787/status'


%%time

for i in tqdm(range(1000)):
    img = client.submit(load_eo_data, pure=False)
    img = client.submit(smooth, img)
    img = client.submit(np.fft.fft2, img)
    future = client.submit(save, img, "file-" + str(i) + "-.dat")
    fire_and_forget(future)

  0%|          | 0/1000 [00:00<?, ?it/s]

CPU times: user 19.8 s, sys: 544 ms, total: 20.3 s
Wall time: 19.8 s


client.close()

cluster.close()

Dashboard: http://127.0.0.1:8787/status	Workers: 4
Total threads: 8	Total memory: 24.00 GiB
Status: running	Using processes: True

Comm: tcp://127.0.0.1:46625	Workers: 4
Dashboard: http://127.0.0.1:8787/status	Total threads: 8
Started: Just now	Total memory: 24.00 GiB

Comm: tcp://127.0.0.1:35807	Total threads: 2
Dashboard: http://127.0.0.1:46401/status	Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:32903
Local directory: /tmp/dask-worker-space/worker-ysvlr1xn

Comm: tcp://127.0.0.1:37627	Total threads: 2
Dashboard: http://127.0.0.1:39739/status	Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:44543
Local directory: /tmp/dask-worker-space/worker-saqxwi2s

Comm: tcp://127.0.0.1:37709	Total threads: 2
Dashboard: http://127.0.0.1:44593/status	Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:36117
Local directory: /tmp/dask-worker-space/worker-2apqarrw

Introduction to Dask and the Open Data Cube

Planning and writing efficient applications¶

What you will learn¶

Performance in Python¶

Python `list` and `numpy`¶

`Numba` - accelerating Python¶

Parallelism with Dask¶

Review of dask¶

Dask local cluster¶

Client

Cluster Info

LocalCluster

Scheduler Info

Scheduler

Workers

Worker: 0

Worker: 1

Worker: 2

Worker: 3

Connection method: Cluster object	Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status

Comm: tcp://127.0.0.1:36111	Total threads: 2
Dashboard: http://127.0.0.1:39227/status	Memory: 6.00 GiB
Nanny: tcp://127.0.0.1:46399
Local directory: /tmp/dask-worker-space/worker-jhs8lkk0

Introduction to Dask and the Open Data Cube

Planning and writing efficient applications¶

What you will learn¶

Performance in Python¶

Python list and numpy¶

Numba - accelerating Python¶

Parallelism with Dask¶

Review of dask¶

Dask local cluster¶

Client

Cluster Info

LocalCluster

Scheduler Info

Scheduler

Workers

Worker: 0

Worker: 1

Worker: 2

Worker: 3

Python `list` and `numpy`¶

`Numba` - accelerating Python¶