Skip to content

Make unzip use iter_suppress? #1055

@pochmann3

Description

@pochmann3

Currently it uses its own special itemgetter version:

def unzip(iterable):

    ...
    iterables = tee(iterable, len(head))

    def itemgetter(i):
        def getter(obj):
            try:
                return obj[i]
            except IndexError:
                raise StopIteration
        return getter

    return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables))

I suggest to use the normal one (from operator) and iter_suppress (which didn't exist when unzip was added):

def unzip(iterable):

    ...
    iterables = tee(iterable, len(head))

    return tuple(
        iter_suppress(map(itemgetter(i), it), IndexError)
        for i, it in enumerate(iterables)
    )

It's also faster in my testing with iterable = [(0,) * 100] * 100 (the times are for calling unzip and consuming all returned iterators):

484.8 ± 0.6 μs  unzip_loop
547.7 ± 1.1 μs  unzip_suppress
815.6 ± 2.2 μs  unzip_current

Python: 3.13.0 (main, Nov  9 2024, 10:04:25) [GCC 14.2.1 20240910]

The "loop" one is another alternative:

def unzip_loop(iterable):

    ...
    iterables = tee(iterable, len(head))

    def itemgetter(i, it):
        try:
            for obj in it:
                yield obj[i]
        except IndexError:
            return

    return tuple(map(itemgetter, count(), iterables))
Benchmark script
def unzip_current(iterable):

    head, iterable = spy(iterable)
    if not head:
        # empty iterable, e.g. zip([], [], [])
        return ()
    # spy returns a one-length iterable as head
    head = head[0]
    iterables = tee(iterable, len(head))

    def itemgetter(i):
        def getter(obj):
            try:
                return obj[i]
            except IndexError:
                raise StopIteration
        return getter

    return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables))


def unzip_suppress(iterable):

    head, iterable = spy(iterable)
    if not head:
        # empty iterable, e.g. zip([], [], [])
        return ()
    # spy returns a one-length iterable as head
    head = head[0]
    iterables = tee(iterable, len(head))

    return tuple(
        iter_suppress(map(itemgetter(i), it), IndexError)
        for i, it in enumerate(iterables)
    )


def unzip_loop(iterable):

    head, iterable = spy(iterable)
    if not head:
        # empty iterable, e.g. zip([], [], [])
        return ()
    # spy returns a one-length iterable as head
    head = head[0]
    iterables = tee(iterable, len(head))

    def itemgetter(i, it):
        try:
            for obj in it:
                yield obj[i]
        except IndexError:
            return

    return tuple(map(itemgetter, count(), iterables))


funcs = [
    unzip_current,
    unzip_suppress,
    unzip_loop,
]


from itertools import *
from operator import itemgetter
from timeit import timeit
from statistics import mean, stdev
from collections import deque
import sys
import random

def spy(iterable, n=1):
    p, q = tee(iterable)
    return take(n, q), p

def take(n, iterable):
    return list(islice(iterable, n))

def iter_suppress(iterable, *exceptions):
    try:
        yield from iterable
    except exceptions:
        return

consume = deque(maxlen=1).extend

# Correctness
for f in funcs:
    iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
    letters, numbers = f(iterable)
    print(list(letters), list(numbers))

# Correctness
for f in funcs:
    iterable = iter([(1, 2, 3), (4, 5), (6,)])
    print(*map(list, f(iterable)))

# Speed
iterable = [(0,) * 100] * 100
times = {f: [] for f in funcs}
def stats(f):
    ts = [t * 1e6 for t in sorted(times[f])[:5]]
    return f'{mean(ts):5.1f} ± {stdev(ts):3.1f} μs '
for _ in range(100):
    random.shuffle(funcs)
    for f in funcs:
        t = timeit(lambda: consume(map(consume, f(iterable))), number=1) / 1
        times[f].append(t)
for f in sorted(funcs, key=stats):
    print(stats(f), f.__name__)

print('\nPython:', sys.version)

Attempt This Online!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions