joblib/joblib/test/test_parallel.py at main · joblib/joblib

History

2250 lines (1780 loc) · 76.3 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

"""

Test the parallel module.

"""

# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>

# License: BSD Style, 3 clauses.

import mmap

import os

import re

import sys

import threading

import time

import warnings

import weakref

from contextlib import nullcontext

from math import sqrt

from multiprocessing import TimeoutError

from pickle import PicklingError

from time import sleep

from traceback import format_exception

import pytest

import joblib

from joblib import dump, load, parallel

from joblib._multiprocessing_helpers import mp

from joblib.test.common import (

IS_GIL_DISABLED,

np,

with_multiprocessing,

with_numpy,

)

from joblib.testing import check_subprocess_call, parametrize, raises, skipif, warns

if mp is not None:

# Loky is not available if multiprocessing is not

from joblib.externals.loky import get_reusable_executor

from queue import Queue

try:

import posix

except ImportError:

posix = None

try:

from ._openmp_test_helper.parallel_sum import parallel_sum

except ImportError:

parallel_sum = None

try:

import distributed

except ImportError:

distributed = None

from joblib._parallel_backends import (

LokyBackend,

MultiprocessingBackend,

ParallelBackendBase,

SequentialBackend,

ThreadingBackend,

)

from joblib.parallel import (

BACKENDS,

Parallel,

cpu_count,

delayed,

effective_n_jobs,

mp,

parallel_backend,

parallel_config,

register_parallel_backend,

)

RETURN_GENERATOR_BACKENDS = BACKENDS.copy()

RETURN_GENERATOR_BACKENDS.pop("multiprocessing", None)

ALL_VALID_BACKENDS = [None] + sorted(BACKENDS.keys())

# Add instances of backend classes deriving from ParallelBackendBase

ALL_VALID_BACKENDS += [BACKENDS[backend_str]() for backend_str in BACKENDS]

if mp is None:

PROCESS_BACKENDS = []

else:

PROCESS_BACKENDS = ["multiprocessing", "loky"]

PARALLEL_BACKENDS = PROCESS_BACKENDS + ["threading"]

if hasattr(mp, "get_context"):

# Custom multiprocessing context in Python 3.4+

ALL_VALID_BACKENDS.append(mp.get_context("spawn"))

def get_default_backend_instance():

# The default backend can be changed before running the tests through

# JOBLIB_DEFAULT_PARALLEL_BACKEND environment variable so we need to use

# parallel.DEFAULT_BACKEND here and not

# from joblib.parallel import DEFAULT_BACKEND

return BACKENDS[parallel.DEFAULT_BACKEND]

def get_workers(backend):

return getattr(backend, "_pool", getattr(backend, "_workers", None))

def division(x, y):

return x / y

def square(x):

return x**2

class MyExceptionWithFinickyInit(Exception):

"""An exception class with non trivial __init__"""

def __init__(self, a, b, c, d):

pass

def exception_raiser(x, custom_exception=False):

if x == 7:

raise (

MyExceptionWithFinickyInit("a", "b", "c", "d")

if custom_exception

else ValueError

)

return x

def interrupt_raiser(x):

time.sleep(0.05)

raise KeyboardInterrupt

def f(x, y=0, z=0):

"""A module-level function so that it can be spawn with

multiprocessing.

"""

return x**2 + y + z

def _active_backend_type():

return type(parallel.get_active_backend()[0])

def parallel_func(inner_n_jobs, backend):

return Parallel(n_jobs=inner_n_jobs, backend=backend)(

delayed(square)(i) for i in range(3)

)

###############################################################################

def test_cpu_count():

assert cpu_count() > 0

def test_effective_n_jobs():

assert effective_n_jobs() > 0

@parametrize("context", [parallel_config, parallel_backend])

@pytest.mark.parametrize(

"backend_n_jobs, expected_n_jobs",

[(3, 3), (-1, effective_n_jobs(n_jobs=-1)), (None, 1)],

ids=["positive-int", "negative-int", "None"],

)

@with_multiprocessing

def test_effective_n_jobs_None(context, backend_n_jobs, expected_n_jobs):

# check the number of effective jobs when `n_jobs=None`

# non-regression test for https://github.com/joblib/joblib/issues/984

with context("threading", n_jobs=backend_n_jobs):

# when using a backend, the default of number jobs will be the one set

# in the backend

assert effective_n_jobs(n_jobs=None) == expected_n_jobs

# without any backend, None will default to a single job

assert effective_n_jobs(n_jobs=None) == 1

###############################################################################

# Test parallel

@parametrize("backend", ALL_VALID_BACKENDS)

@parametrize("n_jobs", [1, 2, -1, -2])

@parametrize("verbose", [2, 11, 100])

def test_simple_parallel(backend, n_jobs, verbose):

assert [square(x) for x in range(5)] == Parallel(

n_jobs=n_jobs, backend=backend, verbose=verbose

)(delayed(square)(x) for x in range(5))

@parametrize("backend", ALL_VALID_BACKENDS)

@parametrize("n_jobs", [1, 2])

def test_parallel_pretty_print(backend, n_jobs):

n_tasks = 100

pattern = re.compile(r"(Done\s+\d+ out of \d+ \|)")

class ParallelLog(Parallel):

messages = []

def _print(self, msg):

self.messages.append(msg)

executor = ParallelLog(n_jobs=n_jobs, backend=backend, verbose=10000)

executor([delayed(f)(i) for i in range(n_tasks)])

lens = set()

for message in executor.messages:

if s := pattern.search(message):

a, b = s.span()

lens.add(b - a)

assert len(lens) == 1

@parametrize("backend", ALL_VALID_BACKENDS)

def test_main_thread_renamed_no_warning(backend, monkeypatch):

# Check that no default backend relies on the name of the main thread:

# https://github.com/joblib/joblib/issues/180#issuecomment-253266247

# Some programs use a different name for the main thread. This is the case

# for uWSGI apps for instance.

monkeypatch.setattr(

target=threading.current_thread(),

name="name",

value="some_new_name_for_the_main_thread",

)

with warnings.catch_warnings(record=True) as warninfo:

results = Parallel(n_jobs=2, backend=backend)(

delayed(square)(x) for x in range(3)

)

assert results == [0, 1, 4]

# Due to the default parameters of LokyBackend, there is a chance that

# warninfo catches Warnings from worker timeouts. We remove it if it exists

# We also remove DeprecationWarnings which could lead to false negatives.

warninfo = [

for w in warninfo

if "worker timeout" not in str(w.message)

and not isinstance(w.message, DeprecationWarning)

]

# Under Python 3.13 if backend='multiprocessing', you will get a

# warning saying that forking a multi-threaded process is not a good idea,

# we ignore them in this test

if backend in [None, "multiprocessing"] or isinstance(

backend, MultiprocessingBackend

message_part = "multi-threaded, use of fork() may lead to deadlocks"

warninfo = [w for w in warninfo if message_part not in str(w.message)]

# The multiprocessing backend will raise a warning when detecting that is

# started from the non-main thread. Let's check that there is no false

# positive because of the name change.

assert len(warninfo) == 0

def _assert_warning_nested(backend, inner_n_jobs, expected):

with warnings.catch_warnings(record=True) as warninfo:

warnings.simplefilter("always")

parallel_func(backend=backend, inner_n_jobs=inner_n_jobs)

warninfo = [w.message for w in warninfo]

if expected:

if warninfo:

warnings_are_correct = all(

"backed parallel loops cannot" in each.args[0] for each in warninfo

)

# With free-threaded Python, when the outer backend is threading,

# we might see more that one warning

warnings_have_the_right_length = (

len(warninfo) >= 1 if IS_GIL_DISABLED else len(warninfo) == 1

)

return warnings_are_correct and warnings_have_the_right_length

return False

else:

assert not warninfo

return True

@with_multiprocessing

@parametrize(

"parent_backend,child_backend,expected",

[

("loky", "multiprocessing", True),

("loky", "loky", False),

("multiprocessing", "multiprocessing", True),

("multiprocessing", "loky", True),

("threading", "multiprocessing", True),

("threading", "loky", True),

)

def test_nested_parallel_warnings(parent_backend, child_backend, expected):

# no warnings if inner_n_jobs=1

Parallel(n_jobs=2, backend=parent_backend)(

delayed(_assert_warning_nested)(

backend=child_backend, inner_n_jobs=1, expected=False

)

for _ in range(5)

)

# warnings if inner_n_jobs != 1 and expected

res = Parallel(n_jobs=2, backend=parent_backend)(

delayed(_assert_warning_nested)(

backend=child_backend, inner_n_jobs=2, expected=expected

)

for _ in range(5)

)

# warning handling is not thread safe. One thread might see multiple

# warning or no warning at all.

if parent_backend == "threading":

assert any(res)

else:

assert all(res)

@with_multiprocessing

@parametrize("backend", ["loky", "multiprocessing", "threading"])

def test_background_thread_parallelism(backend):

is_run_parallel = [False]

def background_thread(is_run_parallel):

with warnings.catch_warnings(record=True) as warninfo:

Parallel(n_jobs=2)(delayed(sleep)(0.1) for _ in range(4))

print(len(warninfo))

is_run_parallel[0] = len(warninfo) == 0

t = threading.Thread(target=background_thread, args=(is_run_parallel,))

t.start()

t.join()

assert is_run_parallel[0]

def nested_loop(backend):

Parallel(n_jobs=2, backend=backend)(delayed(square)(0.01) for _ in range(2))

@parametrize("child_backend", BACKENDS)

@parametrize("parent_backend", BACKENDS)

def test_nested_loop(parent_backend, child_backend):

Parallel(n_jobs=2, backend=parent_backend)(

delayed(nested_loop)(child_backend) for _ in range(2)

)

def raise_exception(backend):

raise ValueError

@with_multiprocessing

def test_nested_loop_with_exception_with_loky():

with raises(ValueError):

with Parallel(n_jobs=2, backend="loky") as parallel:

parallel([delayed(nested_loop)("loky"), delayed(raise_exception)("loky")])

def test_mutate_input_with_threads():

"""Input is mutable when using the threading backend"""

q = Queue(maxsize=5)

Parallel(n_jobs=2, backend="threading")(delayed(q.put)(1) for _ in range(5))

assert q.full()

@parametrize("n_jobs", [1, 2, 3])

def test_parallel_kwargs(n_jobs):

"""Check the keyword argument processing of pmap."""

lst = range(10)

assert [f(x, y=1) for x in lst] == Parallel(n_jobs=n_jobs)(

delayed(f)(x, y=1) for x in lst

)

@parametrize("backend", PARALLEL_BACKENDS)

def test_parallel_as_context_manager(backend):

lst = range(10)

expected = [f(x, y=1) for x in lst]

with Parallel(n_jobs=4, backend=backend) as p:

# Internally a pool instance has been eagerly created and is managed

# via the context manager protocol

managed_backend = p._backend

# We make call with the managed parallel object several times inside

# the managed block:

assert expected == p(delayed(f)(x, y=1) for x in lst)

# Those calls have all used the same pool instance:

if mp is not None:

assert get_workers(managed_backend) is get_workers(p._backend)

# As soon as we exit the context manager block, the pool is terminated and

# no longer referenced from the parallel object:

if mp is not None:

assert get_workers(p._backend) is None

# It's still possible to use the parallel instance in non-managed mode:

assert expected == p(delayed(f)(x, y=1) for x in lst)

if mp is not None:

assert get_workers(p._backend) is None

@with_multiprocessing

def test_parallel_pickling():

"""Check that pmap captures the errors when it is passed an object

that cannot be pickled.

"""

class UnpicklableObject(object):

def __reduce__(self):

raise RuntimeError("123")

with raises(PicklingError, match=r"the task to send"):

Parallel(n_jobs=2, backend="loky")(

delayed(id)(UnpicklableObject()) for _ in range(10)

)

@with_numpy

@with_multiprocessing

@parametrize("byteorder", ["<", ">", "="])

@parametrize("max_nbytes", [1, "1M"])

def test_parallel_byteorder_corruption(byteorder, max_nbytes):

def inspect_byteorder(x):

return x, x.dtype.byteorder

x = np.arange(6).reshape((2, 3)).view(f"{byteorder}i4")

initial_np_byteorder = x.dtype.byteorder

result = Parallel(n_jobs=2, backend="loky", max_nbytes=max_nbytes)(

delayed(inspect_byteorder)(x) for _ in range(3)

)

for x_returned, byteorder_in_worker in result:

assert byteorder_in_worker == initial_np_byteorder

assert byteorder_in_worker == x_returned.dtype.byteorder

np.testing.assert_array_equal(x, x_returned)

@parametrize("backend", PARALLEL_BACKENDS)

def test_parallel_timeout_success(backend):

# Check that timeout isn't thrown when function is fast enough

assert (

len(

Parallel(n_jobs=2, backend=backend, timeout=30)(

delayed(sleep)(0.001) for x in range(10)

)

== 10

)

@with_multiprocessing

@parametrize("backend", PARALLEL_BACKENDS)

def test_parallel_timeout_fail(backend):

# Check that timeout properly fails when function is too slow

with raises(TimeoutError):

Parallel(n_jobs=2, backend=backend, timeout=0.01)(

delayed(sleep)(10) for x in range(10)

)

@with_multiprocessing

@parametrize("backend", set(RETURN_GENERATOR_BACKENDS) - {"sequential"})

@parametrize("return_as", ["generator", "generator_unordered"])

def test_parallel_timeout_fail_with_generator(backend, return_as):

# Check that timeout properly fails when function is too slow with

# return_as=generator

with raises(TimeoutError):

list(

Parallel(n_jobs=2, backend=backend, return_as=return_as, timeout=0.1)(

delayed(sleep)(10) for x in range(10)

)

# Fast tasks and high timeout should not raise

list(

Parallel(n_jobs=2, backend=backend, return_as=return_as, timeout=10)(

delayed(sleep)(0.01) for x in range(10)

)

@with_multiprocessing

@parametrize("backend", PROCESS_BACKENDS)

def test_error_capture(backend):

# Check that error are captured, and that correct exceptions

# are raised.

if mp is not None:

with raises(ZeroDivisionError):

Parallel(n_jobs=2, backend=backend)(

[delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))]

)

with raises(KeyboardInterrupt):

Parallel(n_jobs=2, backend=backend)(

[delayed(interrupt_raiser)(x) for x in (1, 0)]

)

# Try again with the context manager API

with Parallel(n_jobs=2, backend=backend) as parallel:

assert get_workers(parallel._backend) is not None

original_workers = get_workers(parallel._backend)

with raises(ZeroDivisionError):

parallel([delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])

# The managed pool should still be available and be in a working

# state despite the previously raised (and caught) exception

assert get_workers(parallel._backend) is not None

# The pool should have been interrupted and restarted:

assert get_workers(parallel._backend) is not original_workers

assert [f(x, y=1) for x in range(10)] == parallel(

delayed(f)(x, y=1) for x in range(10)

)

original_workers = get_workers(parallel._backend)

with raises(KeyboardInterrupt):

parallel([delayed(interrupt_raiser)(x) for x in (1, 0)])

# The pool should still be available despite the exception

assert get_workers(parallel._backend) is not None

# The pool should have been interrupted and restarted:

assert get_workers(parallel._backend) is not original_workers

assert [f(x, y=1) for x in range(10)] == parallel(

delayed(f)(x, y=1) for x in range(10)

), (

parallel._iterating,

parallel.n_completed_tasks,

parallel.n_dispatched_tasks,

parallel._aborting,

)

# Check that the inner pool has been terminated when exiting the

# context manager

assert get_workers(parallel._backend) is None

else:

with raises(KeyboardInterrupt):

Parallel(n_jobs=2)([delayed(interrupt_raiser)(x) for x in (1, 0)])

# wrapped exceptions should inherit from the class of the original

# exception to make it easy to catch them

with raises(ZeroDivisionError):

Parallel(n_jobs=2)([delayed(division)(x, y) for x, y in zip((0, 1), (1, 0))])

with raises(MyExceptionWithFinickyInit):

Parallel(n_jobs=2, verbose=0)(

(delayed(exception_raiser)(i, custom_exception=True) for i in range(30))

)

@with_multiprocessing

@parametrize("backend", BACKENDS)

def test_error_in_task_iterator(backend):

def my_generator(raise_at=0):

for i in range(20):

if i == raise_at:

raise ValueError("Iterator Raising Error")

yield i

with Parallel(n_jobs=2, backend=backend) as p:

# The error is raised in the pre-dispatch phase

with raises(ValueError, match="Iterator Raising Error"):

p(delayed(square)(i) for i in my_generator(raise_at=0))

# The error is raised when dispatching a new task after the

# pre-dispatch (likely to happen in a different thread)

with raises(ValueError, match="Iterator Raising Error"):

p(delayed(square)(i) for i in my_generator(raise_at=5))

# Same, but raises long after the pre-dispatch phase

with raises(ValueError, match="Iterator Raising Error"):

p(delayed(square)(i) for i in my_generator(raise_at=19))

def consumer(queue, item):

queue.append("Consumed %s" % item)

@parametrize("backend", BACKENDS)

@parametrize(

"batch_size, expected_queue",

[

(

[

"Produced 0",

"Consumed 0",

"Produced 1",

"Consumed 1",

"Produced 2",

"Consumed 2",

"Produced 3",

"Consumed 3",

"Produced 4",

"Consumed 4",

"Produced 5",

"Consumed 5",

(

[ # First Batch

"Produced 0",

"Produced 1",

"Produced 2",

"Produced 3",

"Consumed 0",

"Consumed 1",

"Consumed 2",

"Consumed 3",

# Second batch

"Produced 4",

"Produced 5",

"Consumed 4",

"Consumed 5",

)

def test_dispatch_one_job(backend, batch_size, expected_queue):

"""Test that with only one job, Parallel does act as a iterator."""

queue = list()

def producer():

for i in range(6):

queue.append("Produced %i" % i)

yield i

Parallel(n_jobs=1, batch_size=batch_size, backend=backend)(

delayed(consumer)(queue, x) for x in producer()

)

assert queue == expected_queue

assert len(queue) == 12

@with_multiprocessing

@parametrize("backend", PARALLEL_BACKENDS)

def test_dispatch_multiprocessing(backend):

"""Check that using pre_dispatch Parallel does indeed dispatch items

lazily.

"""

manager = mp.Manager()

queue = manager.list()

def producer():

for i in range(6):

queue.append("Produced %i" % i)

yield i

Parallel(n_jobs=2, batch_size=1, pre_dispatch=3, backend=backend)(

delayed(consumer)(queue, "any") for _ in producer()

)

queue_contents = list(queue)

assert queue_contents[0] == "Produced 0"

# Only 3 tasks are pre-dispatched out of 6. The 4th task is dispatched only

# after any of the first 3 jobs have completed.

first_consumption_index = queue_contents[:4].index("Consumed any")

assert first_consumption_index > -1

produced_3_index = queue_contents.index("Produced 3") # 4th task produced

assert produced_3_index > first_consumption_index

assert len(queue) == 12

def test_batching_auto_threading():

# batching='auto' with the threading backend leaves the effective batch

# size to 1 (no batching) as it has been found to never be beneficial with

# this low-overhead backend.

with Parallel(n_jobs=2, batch_size="auto", backend="threading") as p:

p(delayed(id)(i) for i in range(5000)) # many very fast tasks

assert p._backend.compute_batch_size() == 1

@with_multiprocessing

@parametrize("backend", PROCESS_BACKENDS)

def test_batching_auto_subprocesses(backend):

with Parallel(n_jobs=2, batch_size="auto", backend=backend) as p:

p(delayed(id)(i) for i in range(5000)) # many very fast tasks

# It should be strictly larger than 1 but as we don't want heisen

# failures on clogged CI worker environment be safe and only check that

# it's a strictly positive number.

assert p._backend.compute_batch_size() > 0

def test_exception_dispatch():

"""Make sure that exception raised during dispatch are indeed captured"""

with raises(ValueError):

Parallel(n_jobs=2, pre_dispatch=16, verbose=0)(

delayed(exception_raiser)(i) for i in range(30)

)

def nested_function_inner(i):

Parallel(n_jobs=2)(delayed(exception_raiser)(j) for j in range(30))

def nested_function_outer(i):

Parallel(n_jobs=2)(delayed(nested_function_inner)(j) for j in range(30))

@with_multiprocessing

@parametrize("backend", PARALLEL_BACKENDS)

@pytest.mark.xfail(reason="https://github.com/joblib/loky/pull/255")

def test_nested_exception_dispatch(backend):

"""Ensure errors for nested joblib cases gets propagated

We rely on the Python 3 built-in __cause__ system that already

report this kind of information to the user.

"""

with raises(ValueError) as excinfo:

Parallel(n_jobs=2, backend=backend)(

delayed(nested_function_outer)(i) for i in range(30)

)

# Check that important information such as function names are visible

# in the final error message reported to the user

report_lines = format_exception(excinfo.type, excinfo.value, excinfo.tb)

report = "".join(report_lines)

assert "nested_function_outer" in report

assert "nested_function_inner" in report

assert "exception_raiser" in report

assert type(excinfo.value) is ValueError

class FakeParallelBackend(SequentialBackend):

"""Pretends to run concurrently while running sequentially."""

def configure(self, n_jobs=1, parallel=None, **backend_args):

self.n_jobs = self.effective_n_jobs(n_jobs)

self.parallel = parallel

return n_jobs

def effective_n_jobs(self, n_jobs=1):

if n_jobs < 0:

n_jobs = max(mp.cpu_count() + 1 + n_jobs, 1)

return n_jobs

def test_invalid_backend():

with raises(ValueError, match="Invalid backend:"):

Parallel(backend="unit-testing")

with raises(ValueError, match="Invalid backend:"):

with parallel_config(backend="unit-testing"):

pass

with raises(ValueError, match="Invalid backend:"):

with parallel_config(backend="unit-testing"):

pass

@parametrize("backend", ALL_VALID_BACKENDS)

def test_invalid_njobs(backend):

with raises(ValueError) as excinfo:

Parallel(n_jobs=0, backend=backend)._initialize_backend()

assert "n_jobs == 0 in Parallel has no meaning" in str(excinfo.value)

with raises(ValueError) as excinfo:

Parallel(n_jobs=0.5, backend=backend)._initialize_backend()

assert "n_jobs == 0 in Parallel has no meaning" in str(excinfo.value)

with raises(ValueError) as excinfo:

Parallel(n_jobs="2.3", backend=backend)._initialize_backend()

assert "n_jobs could not be converted to int" in str(excinfo.value)

with raises(ValueError) as excinfo:

Parallel(n_jobs="invalid_str", backend=backend)._initialize_backend()

assert "n_jobs could not be converted to int" in str(excinfo.value)

@with_multiprocessing

@parametrize("backend", PARALLEL_BACKENDS)

@parametrize("n_jobs", ["2", 2.3, 2])

def test_njobs_converted_to_int(backend, n_jobs):

p = Parallel(n_jobs=n_jobs, backend=backend)

assert p._effective_n_jobs() == 2

res = p(delayed(square)(i) for i in range(10))

assert all(r == square(i) for i, r in enumerate(res))

def test_register_parallel_backend():

try:

register_parallel_backend("test_backend", FakeParallelBackend)

assert "test_backend" in BACKENDS

assert BACKENDS["test_backend"] == FakeParallelBackend

finally:

del BACKENDS["test_backend"]

def test_overwrite_default_backend():

default_backend_orig = parallel.DEFAULT_BACKEND

assert _active_backend_type() == get_default_backend_instance()

try:

register_parallel_backend("threading", BACKENDS["threading"], make_default=True)

assert _active_backend_type() == ThreadingBackend

finally:

# Restore the global default manually

parallel.DEFAULT_BACKEND = default_backend_orig

assert _active_backend_type() == get_default_backend_instance()

@skipif(mp is not None, reason="Only without multiprocessing")

def test_backend_no_multiprocessing():

with warns(UserWarning, match="joblib backend '.*' is not available on.*"):

Parallel(backend="loky")(delayed(square)(i) for i in range(3))

# The below should now work without problems

with parallel_config(backend="loky"):

Parallel()(delayed(square)(i) for i in range(3))

def check_backend_context_manager(context, backend_name):

with context(backend_name, n_jobs=3):

active_backend, active_n_jobs = parallel.get_active_backend()

assert active_n_jobs == 3

assert effective_n_jobs(3) == 3

p = Parallel()

assert p.n_jobs == 3

if backend_name == "multiprocessing":

assert type(active_backend) is MultiprocessingBackend

assert type(p._backend) is MultiprocessingBackend

elif backend_name == "loky":

assert type(active_backend) is LokyBackend

assert type(p._backend) is LokyBackend

elif backend_name == "threading":

assert type(active_backend) is ThreadingBackend

assert type(p._backend) is ThreadingBackend

elif backend_name.startswith("test_"):

assert type(active_backend) is FakeParallelBackend

assert type(p._backend) is FakeParallelBackend

all_backends_for_context_manager = PARALLEL_BACKENDS[:]

all_backends_for_context_manager.extend(["test_backend_%d" % i for i in range(3)])

@with_multiprocessing

@parametrize("backend", all_backends_for_context_manager)

@parametrize("context", [parallel_backend, parallel_config])

def test_backend_context_manager(monkeypatch, backend, context):

if backend not in BACKENDS:

monkeypatch.setitem(BACKENDS, backend, FakeParallelBackend)

assert _active_backend_type() == get_default_backend_instance()

# check that this possible to switch parallel backends sequentially

check_backend_context_manager(context, backend)

# The default backend is restored

assert _active_backend_type() == get_default_backend_instance()

# Check that context manager switching is thread safe:

Parallel(n_jobs=2, backend="threading")(

delayed(check_backend_context_manager)(context, b)

for b in all_backends_for_context_manager

if not b

)

# The default backend is again restored

assert _active_backend_type() == get_default_backend_instance()

class ParameterizedParallelBackend(SequentialBackend):

"""Pretends to run conncurrently while running sequentially."""

def __init__(self, param=None):

if param is None:

raise ValueError("param should not be None")

self.param = param

@parametrize("context", [parallel_config, parallel_backend])

def test_parameterized_backend_context_manager(monkeypatch, context):

monkeypatch.setitem(BACKENDS, "param_backend", ParameterizedParallelBackend)

assert _active_backend_type() == get_default_backend_instance()

with context("param_backend", param=42, n_jobs=3):

active_backend, active_n_jobs = parallel.get_active_backend()

assert type(active_backend) is ParameterizedParallelBackend

assert active_backend.param == 42

assert active_n_jobs == 3

p = Parallel()

assert p.n_jobs == 3

assert p._backend is active_backend

results = p(delayed(sqrt)(i) for i in range(5))

assert results == [sqrt(i) for i in range(5)]

# The default backend is again restored

assert _active_backend_type() == get_default_backend_instance()

@parametrize("context", [parallel_config, parallel_backend])

def test_directly_parameterized_backend_context_manager(context):

assert _active_backend_type() == get_default_backend_instance()

# Check that it's possible to pass a backend instance directly,

# without registration

with context(ParameterizedParallelBackend(param=43), n_jobs=5):

active_backend, active_n_jobs = parallel.get_active_backend()

assert type(active_backend) is ParameterizedParallelBackend

assert active_backend.param == 43

assert active_n_jobs == 5

p = Parallel()

assert p.n_jobs == 5

assert p._backend is active_backend

results = p(delayed(sqrt)(i) for i in range(5))

assert results == [sqrt(i) for i in range(5)]

# The default backend is again restored

assert _active_backend_type() == get_default_backend_instance()

def sleep_and_return_pid():

sleep(0.1)

return os.getpid()

def get_nested_pids():

assert _active_backend_type() == ThreadingBackend

# Assert that the nested backend does not change the default number of

# jobs used in Parallel

assert Parallel()._effective_n_jobs() == 1

# Assert that the tasks are running only on one process

return Parallel(n_jobs=2)(delayed(sleep_and_return_pid)() for _ in range(2))

class MyBackend(joblib._parallel_backends.LokyBackend):

"""Backend to test backward compatibility with older backends"""

def get_nested_backend(

self,

# Older backends only return a backend, without n_jobs indications.

return super(MyBackend, self).get_nested_backend()[0]

register_parallel_backend("back_compat_backend", MyBackend)

@with_multiprocessing

@parametrize("backend", ["threading", "loky", "multiprocessing", "back_compat_backend"])

@parametrize("context", [parallel_config, parallel_backend])

def test_nested_backend_context_manager(context, backend):

# Check that by default, nested parallel calls will always use the

# ThreadingBackend

with context(backend):

pid_groups = Parallel(n_jobs=2)(delayed(get_nested_pids)() for _ in range(10))

for pid_group in pid_groups:

assert len(set(pid_group)) == 1

@with_multiprocessing

@parametrize("n_jobs", [2, -1, None])

@parametrize("backend", PARALLEL_BACKENDS)

@parametrize("context", [parallel_config, parallel_backend])

def test_nested_backend_in_sequential(backend, n_jobs, context):

# Check that by default, nested parallel calls will always use the

# ThreadingBackend

def check_nested_backend(expected_backend_type, expected_n_job):

# Assert that the sequential backend at top level, does not change the

# backend for nested calls.

assert _active_backend_type() == BACKENDS[expected_backend_type]

# Assert that the nested backend in SequentialBackend does not change

# the default number of jobs used in Parallel

expected_n_job = effective_n_jobs(expected_n_job)

assert Parallel()._effective_n_jobs() == expected_n_job

Parallel(n_jobs=1)(

delayed(check_nested_backend)(parallel.DEFAULT_BACKEND, 1) for _ in range(10)

)

with context(backend, n_jobs=n_jobs):

Parallel(n_jobs=1)(

delayed(check_nested_backend)(backend, n_jobs) for _ in range(10)

)

def check_nesting_level(context, inner_backend, expected_level):

with context(inner_backend) as ctx:

if context is parallel_config:

backend = ctx["backend"]

if context is parallel_backend:

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

test_parallel.py

Latest commit

History

test_parallel.py

File metadata and controls