Skip to content

Fork start method is susceptible to deadlocks #2245

@Louis-Tian

Description

@Louis-Tian

Versions

Ubuntu 16.04
Python 3.6.2 
pytorch 0.1.12_2

Issue description

import torch
import torch.multiprocessing as mp
import torch.functional as f
import threading
import numpy as np
from timeit import timeit

def build(cuda=False):
    nn = torch.nn.Sequential(
        torch.nn.Linear(1024, 1024),
        torch.nn.Linear(1024, 1)
    )

    return nn.cuda() if cuda else nn

def train(nn, X, y, epoch=100):
    X = torch.autograd.Variable(X)
    y = torch.autograd.Variable(y)
    optim = torch.optim.SGD(nn.parameters(), lr=0.1)
    for i in range(epoch):
        yhat = nn(X)
        loss = ((yhat - y) ** 2).mean()
        loss.backward()
        optim.step()

def data(cuda=False):
    X = torch.Tensor(np.random.randn(10, 1024))
    y = torch.Tensor(np.random.randn(10, 1))
    return (X.cuda(), y.cuda()) if cuda else (X, y)

def cpu_run(i=None):
    nn = build(cuda=False)
    d = data(cuda=False)
    train(nn, *d)

def seq_cpu_run():
    for i in range(5):
        cpu_run()

def multiprocess_cpu_run():
    pool = torch.multiprocessing.Pool(processes=1)
    result = pool.map(cpu_run, [() for i in range(1)])
    pool.close()
    pool.join()
    return result

if __name__ == "__main__":
    print(timeit(seq_cpu_run, number=1)) # 1
    print(timeit(multiprocess_cpu_run, number=1))  # 2

#1 run okay alone.
#2 run okay alone.
#2 then #1 runs okay.
#1 then #2 never terminate.

where
#1 = seq_cpu_run, #2 = multiprocess_cpu_run

Metadata

Metadata

Assignees

No one assigned

    Labels

    featureA request for a proper, new feature.module: multiprocessingRelated to torch.multiprocessingmodule: multithreadingRelated to issues that occur when running on multiple CPU threadstodoNot as important as medium or high priority tasks, but we will work on these.triagedThis issue has been looked at a team member, and triaged and prioritized into an appropriate module

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions