-
Notifications
You must be signed in to change notification settings - Fork 311
Improve mark_ends #1034
Copy link
Copy link
Closed
Description
The current implementation:
def mark_ends(iterable):
it = iter(iterable)
try:
b = next(it)
except StopIteration:
return
try:
for i in count():
a = b
b = next(it)
yield i == 0, False, a
except StopIteration:
yield i == 0, True, aIt's a bit odd and slow. Instead of looping over count() and picking from the input iterator with next() in a try, I'd loop over the input iterator. And instead of producing ints only to compare them to zero, I'd use a bool flag:
def mark_ends__improved(iterable):
it = iter(iterable)
try:
a = next(it)
first = True
except StopIteration:
return
for b in it:
yield first, False, a
a = b
first = False
yield first, True, aOr with "my style" of preferring for statements:
def mark_ends__improved_mystyle(iterable):
it = iter(iterable)
for a in it:
first = True
for b in it:
yield first, False, a
a = b
first = False
yield first, True, aThese are almost the fastest of all my solutions. I also have some shorter ones (including a two-liner). All solutions and the benchmark code are at the end.
Script with all solutions and benchmark
def mark_ends__shortest(iterable):
for i, (a, b) in enumerate(pairwise(chain(iterable, [_marker]))):
yield not i, b is _marker, a
def mark_ends__flag_pairs(iterable):
first = True
for a, b in pairwise(chain(iterable, [_marker])):
yield first, b is _marker, a
first = False
def mark_ends__improved(iterable):
it = iter(iterable)
try:
a = next(it)
first = True
except StopIteration:
return
for b in it:
yield first, False, a
a = b
first = False
yield first, True, a
def mark_ends__impr_mystyle(iterable):
it = iter(iterable)
for a in it:
first = True
for b in it:
yield first, False, a
a = b
first = False
yield first, True, a
# Add an extra step to handle the first item, then
# we don't need a first flag and do as little as
# possible other than fast iterating and yielding.
def mark_ends__fastest_python(iterable):
it = iter(iterable)
for a in it:
for b in it:
yield (True, False, a)
for c in it:
yield (False, False, b)
b = c
yield (False, True, b)
return
yield (True, True, a)
return
# Same as above, but with word variables.
def mark_ends__fastest_python_words(iterable):
iterator = iter(iterable)
for first in iterator:
for second in iterator:
yield (True, False, first)
hold = second
for another in iterator:
yield (False, False, hold)
hold = another
yield (False, True, hold)
return
yield (True, True, first)
return
# Single loop, use index for all decisions.
def mark_ends__enumerate(iterable):
i = -1
for i, b in enumerate(iterable):
if i:
yield i == 1, False, a
a = b
if i >= 0:
yield i == 0, True, a
# Just append the marker to know we're at the end.
def mark_ends__marker_delay(iterable):
it = chain(iterable, [_marker])
for a in it:
first = True
for b in it:
yield first, b is _marker, a
a = b
first = False
# One loop, use flag to distinguish the three states:
# - None: no previous item a
# - True: previous item a was first
# - False: previous item a was not first
def mark_ends__tristate(iterable):
first = None
for b in iterable:
if first is None:
first = True
else:
yield first, False, a
first = False
a = b
if first is not None:
yield first, True, a
def mark_ends__tristate_2(iterable):
first = None
for b in iterable:
if first is not None:
yield first, False, a
a = b
first = first is None
if first is not None:
yield first, True, a
# Almost only tools. One small generator.
def mark_ends__toolsish(iterable):
items, short = tee(iterable)
next(short, None)
return zip(
chain([True], repeat(False)),
chain((False for _ in short), [True]),
items
)
# Optimized version of the above.
def mark_ends__fastest_tools(iterable):
items, short = tee(iterable)
for _ in short:
return zip_longest(
(True,),
chain(compress(repeat(False), zip(short)), (True,)),
items,
fillvalue=False
)
return iter(())
# Replicating the "fastest_python" solution with tools.
def mark_ends__tools_replica(iterable):
it = iter(iterable)
for a in it:
b, c, b2 = tee(it, 3)
for _ in c:
return chain(
((True, False, a),),
zip_longest((), (), compress(b, zip(c, b2)), fillvalue=False),
zip((False,), (True,), b2)
)
return iter(((True, True, a),))
return iter(())
# Same as above, but with word variables.
def mark_ends__tools_replica_words(iterable):
it = iter(iterable)
for first in it:
middle, ahead, last = tee(it, 3)
for _ in ahead:
middle = compress(middle, zip(ahead, last))
return chain(
((True, False, first),),
zip_longest((), (), middle, fillvalue=False),
zip((False,), (True,), last)
)
return iter(((True, True, first),))
return iter(())
# Like "flag_pairs", but instead of appending a marker and
# always checking, append an iterator that flips 'last'.
def mark_ends__end(iterable):
first, last = True, False
def end():
nonlocal last
last = True
yield
for a, _ in pairwise(chain(iterable, end())):
yield first, last, a
first = False
def mark_ends__current(iterable):
it = iter(iterable)
try:
b = next(it)
except StopIteration:
return
try:
for i in count():
a = b
b = next(it)
yield i == 0, False, a
except StopIteration:
yield i == 0, True, a
funcs = [
mark_ends__current,
mark_ends__improved,
mark_ends__impr_mystyle,
mark_ends__shortest,
mark_ends__flag_pairs,
mark_ends__fastest_python,
mark_ends__enumerate,
mark_ends__marker_delay,
mark_ends__tristate,
mark_ends__tristate_2,
mark_ends__toolsish,
mark_ends__fastest_tools,
mark_ends__tools_replica,
mark_ends__end,
]
from timeit import timeit
from statistics import mean, stdev
from itertools import *
import sys
import random
_marker = object()
# Correctness
for n in range(10):
iterable = list(range(n))
expect = list(mark_ends__current(iterable))
for f in funcs:
result = list(f(iterable))
assert result == expect, (n, f)
# Speed
ns = sorted({
n
for i in range(41)
for n in [round(10**(i/10))]
})
N = max(ns)
def shuffled(xs):
return random.sample(xs, len(xs))
times = {f: {n: [] for n in ns} for f in funcs}
for i in range(5):
# print(i)
for n in shuffled(ns):
iterable = [None] * n
reps = round((N/n)**.75)
for f in shuffled(funcs):
t = timeit(
'for is_first, is_last, item in f(iterable): pass',
'from __main__ import f, iterable; gc.enable()',
number=reps
) / reps
times[f][n].append(t)
# fastest time for each (function,n) pair, in nanoseconds
times = {f.__name__.removeprefix('mark_ends__'): [round(min(times[f][n]) * 1e9, 1) for n in ns]
for f in funcs
}
# Times in percent of the current times
times = {
name: [round(t / td * 100, 2) for t, td in zip(times[name], times['current'])]
for name in times
}
# Plotting
import matplotlib.pyplot as plt
import matplotlib
colors = iter(matplotlib.cm.tab20(range(20)))
for name, y in times.items():
plt.semilogx(ns, y, label=name, color=next(colors))
plt.ylim(0, 130)
#plt.ylim(bottom=0)
# plt.title('sliding_window([None]*10000, n)', weight='bold')
plt.title('mark_ends', weight='bold')
plt.xlabel('len(iterable)', weight='bold')
plt.ylabel('time in % of current solution time', weight='bold')
#plt.legend(loc='upper right')
plt.legend(loc='lower center', ncol=3)
plt.savefig('mark_ends.png', dpi=200)
print(sys.version)Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels