Skip to content

Latest commit

 

History

History
661 lines (610 loc) · 25.1 KB

File metadata and controls

661 lines (610 loc) · 25.1 KB
 
Jun 14, 2016
Jun 14, 2016
1
#
Jun 14, 2016
Jun 14, 2016
2
# Licensed to the Apache Software Foundation (ASF) under one or more
3
# contributor license agreements. See the NOTICE file distributed with
4
# this work for additional information regarding copyright ownership.
5
# The ASF licenses this file to You under the Apache License, Version 2.0
6
# (the "License"); you may not use this file except in compliance with
7
# the License. You may obtain a copy of the License at
Jun 14, 2016
Jun 14, 2016
8
#
Jun 14, 2016
Jun 14, 2016
9
# http://www.apache.org/licenses/LICENSE-2.0
Jun 14, 2016
Jun 14, 2016
10
#
11
# Unless required by applicable law or agreed to in writing, software
12
# distributed under the License is distributed on an "AS IS" BASIS,
13
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
# See the License for the specific language governing permissions and
15
# limitations under the License.
Jun 14, 2016
Jun 14, 2016
16
#
Jun 14, 2016
Jun 14, 2016
17
Jul 23, 2016
Jul 23, 2016
18
"""Apache Beam SDK for Python setup file."""
Feb 8, 2024
Feb 8, 2024
19
import glob
Feb 22, 2024
Feb 22, 2024
20
import logging
Jun 14, 2016
Jun 14, 2016
21
import os
Feb 8, 2024
Feb 8, 2024
22
import shutil
Oct 12, 2023
Oct 12, 2023
23
import subprocess
Feb 5, 2019
Feb 5, 2019
24
import sys
Oct 14, 2016
Oct 14, 2016
25
import warnings
Aug 17, 2023
Aug 17, 2023
26
# Pylint and isort disagree here.
27
# pylint: disable=ungrouped-imports
28
from importlib.metadata import PackageNotFoundError
29
from importlib.metadata import distribution
Apr 7, 2022
Apr 7, 2022
30
from pathlib import Path
Oct 14, 2016
Oct 14, 2016
31
Apr 10, 2018
Apr 10, 2018
32
# pylint: disable=ungrouped-imports
Jun 14, 2016
Jun 14, 2016
33
import setuptools
Jun 5, 2025
Jun 5, 2025
34
from packaging.version import parse
Dec 11, 2019
Dec 11, 2019
35
from setuptools import Command
May 24, 2017
May 24, 2017
36
Jul 27, 2022
Jul 27, 2022
37
# pylint: disable=wrong-import-order
38
# It is recommended to import setuptools prior to importing distutils to avoid
39
# using legacy behavior from distutils.
40
# https://setuptools.readthedocs.io/en/latest/history.html#v48-0-0
Mar 28, 2023
Mar 28, 2023
41
from distutils.errors import DistutilsError # isort:skip
Jul 27, 2022
Jul 27, 2022
42
Jun 14, 2016
Jun 14, 2016
43
Jun 5, 2025
Jun 5, 2025
44
def to_filename(name: str) -> str:
45
return name.replace('-', '_')
46
47
48
def normalize_path(filename):
49
return os.path.normcase(os.path.realpath(os.path.normpath(filename)))
50
51
Dec 11, 2019
Dec 11, 2019
52
class mypy(Command):
53
user_options = []
54
55
def initialize_options(self):
56
"""Abstract method that is required to be overwritten"""
57
58
def finalize_options(self):
59
"""Abstract method that is required to be overwritten"""
60
61
def get_project_path(self):
62
self.run_command('egg_info')
63
64
# Build extensions in-place
65
self.reinitialize_command('build_ext', inplace=1)
66
self.run_command('build_ext')
67
68
ei_cmd = self.get_finalized_command("egg_info")
69
70
project_path = normalize_path(ei_cmd.egg_base)
71
return os.path.join(project_path, to_filename(ei_cmd.egg_name))
72
73
def run(self):
74
args = ['mypy', self.get_project_path()]
75
result = subprocess.call(args)
76
if result != 0:
77
raise DistutilsError("mypy exited with status %d" % result)
78
79
Jul 23, 2016
Jul 23, 2016
80
def get_version():
Mar 15, 2017
Mar 15, 2017
81
global_names = {}
Aug 6, 2018
Aug 6, 2018
82
exec( # pylint: disable=exec-used
83
open(os.path.join(
84
os.path.dirname(os.path.abspath(__file__)),
85
'apache_beam/version.py')
86
).read(),
87
global_names
88
)
Jul 23, 2016
Jul 23, 2016
89
return global_names['__version__']
90
Apr 10, 2018
Apr 10, 2018
91
Feb 24, 2017
Feb 24, 2017
92
PACKAGE_NAME = 'apache-beam'
Jul 23, 2016
Jul 23, 2016
93
PACKAGE_VERSION = get_version()
94
PACKAGE_DESCRIPTION = 'Apache Beam SDK for Python'
Jan 30, 2017
Jan 30, 2017
95
PACKAGE_URL = 'https://beam.apache.org'
Mar 13, 2017
Mar 13, 2017
96
PACKAGE_DOWNLOAD_URL = 'https://pypi.python.org/pypi/apache-beam'
Jul 23, 2016
Jul 23, 2016
97
PACKAGE_AUTHOR = 'Apache Software Foundation'
Jan 30, 2017
Jan 30, 2017
98
PACKAGE_EMAIL = 'dev@beam.apache.org'
Jul 23, 2016
Jul 23, 2016
99
PACKAGE_KEYWORDS = 'apache beam'
100
Nov 11, 2022
Nov 11, 2022
101
RECOMMENDED_MIN_PIP_VERSION = '19.3.0'
Sep 30, 2022
Sep 30, 2022
102
try:
Aug 17, 2023
Aug 17, 2023
103
_PIP_VERSION = distribution('pip').version
Jun 5, 2025
Jun 5, 2025
104
if parse(_PIP_VERSION) < parse(RECOMMENDED_MIN_PIP_VERSION):
Sep 30, 2022
Sep 30, 2022
105
warnings.warn(
106
"You are using version {0} of pip. " \
107
"However, the recommended min version is {1}.".format(
108
_PIP_VERSION, RECOMMENDED_MIN_PIP_VERSION
109
)
110
)
Aug 17, 2023
Aug 17, 2023
111
except PackageNotFoundError:
Sep 30, 2022
Sep 30, 2022
112
# Do nothing if pip is not found. This can happen when using `Poetry` or
113
# `pipenv` package managers.
114
pass
Oct 14, 2016
Oct 14, 2016
115
Aug 8, 2024
Aug 8, 2024
116
REQUIRED_CYTHON_VERSION = '3.0.0'
Oct 14, 2016
Oct 14, 2016
117
try:
Aug 17, 2023
Aug 17, 2023
118
_CYTHON_VERSION = distribution('cython').version
Jun 5, 2025
Jun 5, 2025
119
if parse(_CYTHON_VERSION) < parse(REQUIRED_CYTHON_VERSION):
Oct 14, 2016
Oct 14, 2016
120
warnings.warn(
121
"You are using version {0} of cython. " \
122
"However, version {1} is recommended.".format(
123
_CYTHON_VERSION, REQUIRED_CYTHON_VERSION
124
)
125
)
Aug 17, 2023
Aug 17, 2023
126
except PackageNotFoundError:
Oct 14, 2016
Oct 14, 2016
127
# do nothing if Cython is not installed
128
pass
Jul 23, 2016
Jul 23, 2016
129
Jul 17, 2020
Jul 17, 2020
130
try:
131
# pylint: disable=wrong-import-position
Sep 30, 2022
Sep 30, 2022
132
from Cython.Build import cythonize as cythonize0
Mar 28, 2023
Mar 28, 2023
133
Sep 30, 2022
Sep 30, 2022
134
def cythonize(*args, **kwargs):
135
import numpy
136
extensions = cythonize0(*args, **kwargs)
137
for e in extensions:
138
e.include_dirs.append(numpy.get_include())
139
return extensions
Jul 17, 2020
Jul 17, 2020
140
except ImportError:
Jun 14, 2016
Jun 14, 2016
141
cythonize = lambda *args, **kwargs: []
142
Sep 9, 2019
Sep 9, 2019
143
# [BEAM-8181] pyarrow cannot be installed on 32-bit Windows platforms.
144
if sys.platform == 'win32' and sys.maxsize <= 2**32:
Nov 11, 2023
Nov 11, 2023
145
pyarrow_dependency = ['']
May 11, 2022
May 11, 2022
146
else:
Nov 11, 2023
Nov 11, 2023
147
pyarrow_dependency = [
Feb 4, 2026
Feb 4, 2026
148
'pyarrow>=6.0.1,<24.0.0',
Nov 11, 2023
Nov 11, 2023
149
# NOTE(https://github.com/apache/beam/issues/29392): We can remove this
150
# once Beam increases the pyarrow lower bound to a version that fixes CVE.
May 19, 2025
May 19, 2025
151
# (lower bound >= 14.0.1)
Nov 11, 2023
Nov 11, 2023
152
'pyarrow-hotfix<1'
153
]
154
Apr 21, 2023
Apr 21, 2023
155
# Exclude pandas<=1.4.2 since it doesn't work with numpy 1.24.x.
156
# Exclude 1.5.0 and 1.5.1 because of
157
# https://github.com/pandas-dev/pandas/issues/45725
Jan 21, 2025
Jan 21, 2025
158
# must update the below "docs" and "test" for extras_require
Apr 21, 2023
Apr 21, 2023
159
dataframe_dependency = [
Oct 21, 2024
Oct 21, 2024
160
'pandas>=1.4.3,!=1.5.0,!=1.5.1,<2.3',
Apr 21, 2023
Apr 21, 2023
161
]
162
Aug 28, 2025
Aug 28, 2025
163
milvus_dependency = ['pymilvus>=2.5.10,<3.0.0']
164
Oct 3, 2025
Oct 3, 2025
165
ml_base = [
Feb 27, 2026
Feb 27, 2026
166
'embeddings>=0.0.4', # 0.0.3 crashes setuptools
Oct 3, 2025
Oct 3, 2025
167
'onnxruntime',
168
'langchain',
Nov 6, 2025
Nov 6, 2025
169
'sentence-transformers>=2.2.2',
Oct 3, 2025
Oct 3, 2025
170
'skl2onnx',
Feb 27, 2026
Feb 27, 2026
171
'pyod>=0.7.6', # 0.7.5 crashes setuptools
Oct 3, 2025
Oct 3, 2025
172
'tensorflow',
Mar 16, 2026
Mar 16, 2026
173
# tensorflow transitive dep, lower versions not compatible with Python3.10+
Feb 27, 2026
Feb 27, 2026
174
'absl-py>=0.12.0',
Oct 3, 2025
Oct 3, 2025
175
'tensorflow-hub',
176
'tf2onnx',
177
'torch',
178
'transformers',
179
]
180
Feb 8, 2024
Feb 8, 2024
181
Oct 17, 2023
Oct 17, 2023
182
def find_by_ext(root_dir, ext):
183
for root, _, files in os.walk(root_dir):
184
for file in files:
185
if file.endswith(ext):
186
yield os.path.realpath(os.path.join(root, file))
Mar 28, 2023
Mar 28, 2023
187
Feb 8, 2024
Feb 8, 2024
188
May 24, 2017
May 24, 2017
189
# We must generate protos after setup_requires are installed.
Apr 7, 2022
Apr 7, 2022
190
def generate_protos_first():
Jun 6, 2017
Jun 6, 2017
191
try:
Oct 12, 2023
Oct 12, 2023
192
# Pyproject toml build happens in isolated environemnts. In those envs,
193
# gen_protos is unable to get imported. so we run a subprocess call.
194
cwd = os.path.abspath(os.path.dirname(__file__))
Oct 17, 2023
Oct 17, 2023
195
# when pip install <>.tar.gz gets called, if gen_protos.py is not available
196
# in the sdist,then the proto files would have already been generated. So we
197
# skip proto generation in that case.
198
if not os.path.exists(os.path.join(cwd, 'gen_protos.py')):
199
# make sure we already generated protos
Feb 8, 2024
Feb 8, 2024
200
pb2_files = list(
201
find_by_ext(
202
os.path.join(cwd, 'apache_beam', 'portability', 'api'),
203
'_pb2.py'))
Oct 17, 2023
Oct 17, 2023
204
if not pb2_files:
Feb 8, 2024
Feb 8, 2024
205
raise RuntimeError(
206
'protobuf files are not generated. '
207
'Please generate pb2 files')
Oct 17, 2023
Oct 17, 2023
208
209
warnings.warn('Skipping proto generation as they are already generated.')
210
return
Feb 8, 2024
Feb 8, 2024
211
out = subprocess.run(
212
[sys.executable, os.path.join(cwd, 'gen_protos.py'), '--no-force'],
213
capture_output=True,
214
check=True)
Oct 12, 2023
Oct 12, 2023
215
print(out.stdout)
216
except subprocess.CalledProcessError as err:
Feb 8, 2024
Feb 8, 2024
217
raise RuntimeError('Could not generate protos due to error: %s', err.stderr)
218
219
220
def copy_tests_from_docs():
221
python_root = os.path.abspath(os.path.dirname(__file__))
222
docs_src = os.path.normpath(
223
os.path.join(
224
python_root, '../../website/www/site/content/en/documentation/sdks'))
225
docs_dest = os.path.normpath(
226
os.path.join(python_root, 'apache_beam/yaml/docs'))
227
if os.path.exists(docs_src):
228
shutil.rmtree(docs_dest, ignore_errors=True)
229
os.mkdir(docs_dest)
230
for path in glob.glob(os.path.join(docs_src, 'yaml*.md')):
231
shutil.copy(path, docs_dest)
232
else:
Mar 31, 2025
Mar 31, 2025
233
warnings.warn(
234
f'Could not locate yaml docs source directory {docs_src}. '
235
f'Skipping copying tests from docs.')
Apr 7, 2022
Apr 7, 2022
236
237
Feb 22, 2024
Feb 22, 2024
238
def generate_external_transform_wrappers():
239
try:
240
sdk_dir = os.path.abspath(os.path.dirname(__file__))
241
script_exists = os.path.exists(
242
os.path.join(sdk_dir, 'gen_xlang_wrappers.py'))
243
config_exists = os.path.exists(
May 14, 2025
May 14, 2025
244
os.path.join(
245
os.path.dirname(sdk_dir), 'standard_external_transforms.yaml'))
Feb 22, 2024
Feb 22, 2024
246
# we need both the script and the standard transforms config file.
247
# at build time, we don't have access to apache_beam to discover and
248
# retrieve external transforms, so the config file has to already exist
249
if not script_exists or not config_exists:
250
generated_transforms_dir = os.path.join(
May 14, 2025
May 14, 2025
251
sdk_dir, 'apache_beam', 'transforms', 'xlang')
Feb 22, 2024
Feb 22, 2024
252
253
# if exists, this directory will have at least its __init__.py file
254
if (not os.path.exists(generated_transforms_dir) or
May 14, 2025
May 14, 2025
255
len(os.listdir(generated_transforms_dir)) <= 1):
Feb 22, 2024
Feb 22, 2024
256
message = 'External transform wrappers have not been generated '
257
if not script_exists:
258
message += 'and the generation script `gen_xlang_wrappers.py`'
259
if not config_exists:
260
message += 'and the standard external transforms config'
261
message += ' could not be found'
262
raise RuntimeError(message)
263
else:
264
logging.info(
265
'Skipping external transform wrapper generation as they '
266
'are already generated.')
267
return
268
subprocess.run([
269
sys.executable,
270
os.path.join(sdk_dir, 'gen_xlang_wrappers.py'),
271
'--cleanup',
272
'--transforms-config-source',
May 14, 2025
May 14, 2025
273
os.path.join(
274
os.path.dirname(sdk_dir), 'standard_external_transforms.yaml')
275
],
276
capture_output=True,
277
check=True)
Feb 22, 2024
Feb 22, 2024
278
except subprocess.CalledProcessError as err:
279
raise RuntimeError(
280
'Could not generate external transform wrappers due to '
May 14, 2025
May 14, 2025
281
'error: %s',
282
err.stderr)
Feb 22, 2024
Feb 22, 2024
283
284
Apr 7, 2022
Apr 7, 2022
285
def get_portability_package_data():
286
files = []
287
portability_dir = Path(__file__).parent / 'apache_beam' / \
288
'portability' / 'api'
289
for ext in ['*.pyi', '*.yaml']:
290
files.extend(
291
str(p.relative_to(portability_dir.parent.parent))
292
for p in portability_dir.rglob(ext))
293
294
return files
May 24, 2017
May 24, 2017
295
Apr 25, 2018
Apr 25, 2018
296
Nov 6, 2025
Nov 6, 2025
297
python_requires = '>=3.10'
Feb 1, 2018
Feb 1, 2018
298
Oct 3, 2025
Oct 3, 2025
299
if sys.version_info.major == 3 and sys.version_info.minor >= 14:
Jul 1, 2020
Jul 1, 2020
300
warnings.warn(
301
'This version of Apache Beam has not been sufficiently tested on '
Aug 27, 2021
Aug 27, 2021
302
'Python %s.%s. You may encounter bugs or missing features.' %
303
(sys.version_info.major, sys.version_info.minor))
Jul 1, 2020
Jul 1, 2020
304
Oct 4, 2021
Oct 4, 2021
305
if __name__ == '__main__':
Apr 7, 2022
Apr 7, 2022
306
# In order to find the tree of proto packages, the directory
307
# structure must exist before the call to setuptools.find_packages()
308
# executes below.
309
generate_protos_first()
Oct 12, 2023
Oct 12, 2023
310
Feb 22, 2024
Feb 22, 2024
311
generate_external_transform_wrappers()
312
Feb 8, 2024
Feb 8, 2024
313
# These data files live elsewhere in the full Beam repository.
314
copy_tests_from_docs()
315
Oct 12, 2023
Oct 12, 2023
316
# generate cythonize extensions only if we are building a wheel or
317
# building an extension or running in editable mode.
318
cythonize_cmds = ('bdist_wheel', 'build_ext', 'editable_wheel')
319
if any(cmd in sys.argv for cmd in cythonize_cmds):
320
extensions = cythonize([
Feb 8, 2024
Feb 8, 2024
321
'apache_beam/**/*.pyx',
322
'apache_beam/coders/coder_impl.py',
323
'apache_beam/metrics/cells.py',
324
'apache_beam/metrics/execution.py',
325
'apache_beam/runners/common.py',
326
'apache_beam/runners/worker/logger.py',
327
'apache_beam/runners/worker/opcounters.py',
328
'apache_beam/runners/worker/operations.py',
329
'apache_beam/transforms/cy_combiners.py',
330
'apache_beam/transforms/stats.py',
331
'apache_beam/utils/counters.py',
332
'apache_beam/utils/windowed_value.py',
333
])
Oct 12, 2023
Oct 12, 2023
334
else:
335
extensions = []
Jun 9, 2025
Jun 9, 2025
336
Jun 11, 2025
Jun 11, 2025
337
try:
338
long_description = ((Path(__file__).parent /
339
"README.md").read_text(encoding='utf-8'))
340
except FileNotFoundError:
341
long_description = (
342
'Apache Beam is a unified programming model for both batch and '
343
'streaming data processing, enabling efficient execution across '
344
'diverse distributed execution engines and providing extensibility '
345
'points for connecting to different technologies and user '
346
'communities.')
Jun 9, 2025
Jun 9, 2025
347
May 11, 2022
May 11, 2022
348
# Keep all dependencies inlined in the setup call, otherwise Dependabot won't
349
# be able to parse it.
Oct 4, 2021
Oct 4, 2021
350
setuptools.setup(
351
name=PACKAGE_NAME,
352
version=PACKAGE_VERSION,
353
description=PACKAGE_DESCRIPTION,
Jun 9, 2025
Jun 9, 2025
354
long_description=long_description,
355
long_description_content_type='text/markdown',
Oct 4, 2021
Oct 4, 2021
356
url=PACKAGE_URL,
357
download_url=PACKAGE_DOWNLOAD_URL,
358
author=PACKAGE_AUTHOR,
359
author_email=PACKAGE_EMAIL,
360
packages=setuptools.find_packages(),
361
package_data={
362
'apache_beam': [
363
'*/*.pyx',
364
'*/*/*.pyx',
365
'*/*.pxd',
366
'*/*/*.pxd',
367
'*/*.h',
368
'*/*/*.h',
369
'testing/data/*.yaml',
Jun 6, 2023
Jun 6, 2023
370
'yaml/*.yaml',
Feb 8, 2024
Feb 8, 2024
371
'yaml/docs/*.md',
Apr 7, 2022
Apr 7, 2022
372
*get_portability_package_data()
Oct 4, 2021
Oct 4, 2021
373
]
374
},
Oct 12, 2023
Oct 12, 2023
375
ext_modules=extensions,
Aug 17, 2023
Aug 17, 2023
376
install_requires=[
Sep 26, 2025
Sep 26, 2025
377
'cryptography>=39.0.0,<48.0.0',
Mar 16, 2026
Mar 16, 2026
378
'envoy-data-plane>=1.0.3,<2; python_version >= "3.11"',
379
# Newer version only work on Python 3.11. Versions 0.3 <= ver < 1.x
380
# conflict with other GCP dependencies.
381
'envoy-data-plane<0.3.0; python_version < "3.11"',
Aug 17, 2023
Aug 17, 2023
382
'fastavro>=0.23.6,<2',
383
'fasteners>=0.3,<1.0',
Mar 11, 2026
Mar 11, 2026
384
'grpcio>=1.33.1,<2,!=1.48.0,!=1.59.*,!=1.60.*,!=1.61.*,!=1.62.0,!=1.62.1,!=1.66.*,!=1.67.*,!=1.68.*,!=1.69.*,!=1.70.*', # pylint: disable=line-too-long
Jan 15, 2026
Jan 15, 2026
385
'httplib2>=0.8,<0.32.0',
Dec 11, 2023
Dec 11, 2023
386
'jsonpickle>=3.0.0,<4.0.0',
Aug 17, 2023
Aug 17, 2023
387
# numpy can have breaking changes in minor versions.
388
# Use a strict upper bound.
Jan 7, 2026
Jan 7, 2026
389
'numpy>=1.14.3,<2.5.0', # Update pyproject.toml as well.
Dec 26, 2023
Dec 26, 2023
390
'objsize>=0.6.1,<0.8.0',
Aug 17, 2023
Aug 17, 2023
391
'packaging>=22.0',
Feb 23, 2026
Feb 23, 2026
392
'pillow>=12.1.1,<13',
Aug 17, 2023
Aug 17, 2023
393
'pymongo>=3.8.0,<5.0.0',
394
'proto-plus>=1.7.1,<2',
Sep 12, 2023
Sep 12, 2023
395
# 1. Use a tighter upper bound in protobuf dependency to make sure
396
# the minor version at job submission
Aug 17, 2023
Aug 17, 2023
397
# does not exceed the minor version at runtime.
398
# To avoid depending on an old dependency, update the minor version on
399
# every Beam release, see: https://github.com/apache/beam/issues/25590
Sep 12, 2023
Sep 12, 2023
400
401
# 2. Allow latest protobuf 3 version as a courtesy to some customers.
402
#
403
# 3. Exclude protobuf 4 versions that leak memory, see:
404
# https://github.com/apache/beam/issues/28246
Sep 3, 2025
Sep 3, 2025
405
'protobuf>=3.20.3,<7.0.0.dev0,!=4.0.*,!=4.21.*,!=4.22.0,!=4.23.*,!=4.24.*', # pylint: disable=line-too-long
Aug 17, 2023
Aug 17, 2023
406
'python-dateutil>=2.8.0,<3',
407
'pytz>=2018.3',
Jun 16, 2025
Jun 16, 2025
408
'requests>=2.32.4,<3.0.0',
Oct 9, 2024
Oct 9, 2024
409
'sortedcontainers>=2.4.0',
Aug 17, 2023
Aug 17, 2023
410
'typing-extensions>=3.7.0',
411
'zstandard>=0.18.0,<1',
Oct 15, 2024
Oct 15, 2024
412
'pyyaml>=3.12,<7.0.0',
Dec 9, 2025
Dec 9, 2025
413
'beartype>=0.21.0,<0.23.0',
Aug 17, 2023
Aug 17, 2023
414
# Dynamic dependencies must be specified in a separate list, otherwise
415
# Dependabot won't be able to parse the main list. Any dynamic
416
# dependencies will not receive updates from Dependabot.
Nov 11, 2023
Nov 11, 2023
417
] + pyarrow_dependency,
Oct 4, 2021
Oct 4, 2021
418
python_requires=python_requires,
419
# BEAM-8840: Do NOT use tests_require or setup_requires.
420
extras_require={
Sep 12, 2025
Sep 12, 2025
421
'dill': [
Oct 3, 2025
Oct 3, 2025
422
# Dill doesn't have forwards-compatibility guarantees within minor
423
# version. Pickles created with a new version of dill may not
424
# unpickle using older version of dill. It is best to use the same
425
# version of dill on client and server, therefore list of allowed
426
# versions is very narrow.
427
# See: https://github.com/uqfoundation/dill/issues/341.
428
'dill>=0.3.1.1,<0.3.2',
Sep 12, 2025
Sep 12, 2025
429
],
Oct 27, 2021
Oct 27, 2021
430
'docs': [
Sep 6, 2024
Sep 6, 2024
431
'jinja2>=3.0,<3.2',
Oct 10, 2024
Oct 10, 2024
432
'Sphinx>=7.0.0,<8.0',
Dec 7, 2023
Dec 7, 2023
433
'docstring-parser>=0.15,<1.0',
Oct 10, 2024
Oct 10, 2024
434
'docutils>=0.18.1',
Apr 22, 2025
Apr 22, 2025
435
'markdown',
Jan 21, 2025
Jan 21, 2025
436
'pandas<2.3.0',
Mar 18, 2025
Mar 18, 2025
437
'openai',
438
'virtualenv-clone>=0.5,<1.0',
Oct 27, 2021
Oct 27, 2021
439
],
May 11, 2022
May 11, 2022
440
'test': [
Jun 11, 2025
Jun 11, 2025
441
'cloud-sql-python-connector[pg8000]>=1.0.0,<2.0.0',
Dec 7, 2023
Dec 7, 2023
442
'docstring-parser>=0.15,<1.0',
Aug 17, 2023
Aug 17, 2023
443
'freezegun>=0.3.12',
Sep 6, 2024
Sep 6, 2024
444
'jinja2>=3.0,<3.2',
Aug 17, 2023
Aug 17, 2023
445
'joblib>=1.0.1',
446
'mock>=1.0.1,<6.0.0',
Jan 21, 2025
Jan 21, 2025
447
'pandas<2.3.0',
Aug 17, 2023
Aug 17, 2023
448
'parameterized>=0.7.1,<0.10.0',
449
'pyhamcrest>=1.9,!=1.10.0,<3.0.0',
450
'requests_mock>=1.7,<2.0',
451
'tenacity>=8.0.0,<9',
Oct 3, 2025
Oct 3, 2025
452
'pytest>=7.1.2,<9.0',
Aug 17, 2023
Aug 17, 2023
453
'pytest-xdist>=2.5.0,<4',
454
'pytest-timeout>=2.1.0,<3',
Dec 1, 2025
Dec 1, 2025
455
'scikit-learn>=0.20.0,<1.8.0',
Apr 11, 2024
Apr 11, 2024
456
'sqlalchemy>=1.3,<3.0',
Nov 6, 2025
Nov 6, 2025
457
'psycopg2-binary>=2.8.5,<3.0',
Jun 30, 2025
Jun 30, 2025
458
'testcontainers[mysql,kafka,milvus]>=4.0.0,<5.0.0',
Aug 17, 2023
Aug 17, 2023
459
'cryptography>=41.0.2',
Dec 1, 2025
Dec 1, 2025
460
# TODO(https://github.com/apache/beam/issues/36951): need to
461
# further investigate the cause
462
'hypothesis>5.0.0,<6.148.4',
Mar 18, 2025
Mar 18, 2025
463
'virtualenv-clone>=0.5,<1.0',
May 27, 2025
May 27, 2025
464
'python-tds>=1.16.1',
Jun 12, 2025
Jun 12, 2025
465
'sqlalchemy-pytds>=1.0.2',
Nov 10, 2025
Nov 10, 2025
466
'pg8000>=1.31.5',
Aug 22, 2025
Aug 22, 2025
467
"PyMySQL>=1.1.0",
Aug 28, 2025
Aug 28, 2025
468
'oracledb>=3.1.1'
Feb 23, 2026
Feb 23, 2026
469
],
May 11, 2022
May 11, 2022
470
'gcp': [
Jul 24, 2025
Jul 24, 2025
471
'cachetools>=3.1.0,<7',
Apr 18, 2024
Apr 18, 2024
472
'google-api-core>=2.0.0,<3',
Aug 30, 2025
Aug 30, 2025
473
'google-apitools>=0.5.31,<0.5.32; python_version < "3.13"',
Oct 3, 2025
Oct 3, 2025
474
'google-apitools>=0.5.35; python_version >= "3.13"',
Aug 17, 2023
Aug 17, 2023
475
# NOTE: Maintainers, please do not require google-auth>=2.x.x
476
# Until this issue is closed
477
# https://github.com/googleapis/google-cloud-python/issues/10566
478
'google-auth>=1.18.0,<3',
Apr 16, 2024
Apr 16, 2024
479
'google-auth-httplib2>=0.1.0,<0.3.0',
Aug 17, 2023
Aug 17, 2023
480
'google-cloud-datastore>=2.0.0,<3',
481
'google-cloud-pubsub>=2.1.0,<3',
Aug 9, 2024
Aug 9, 2024
482
'google-cloud-storage>=2.18.2,<3',
Aug 17, 2023
Aug 17, 2023
483
# GCP packages required by tests
484
'google-cloud-bigquery>=2.0.0,<4',
485
'google-cloud-bigquery-storage>=2.6.3,<3',
486
'google-cloud-core>=2.0.0,<3',
487
'google-cloud-bigtable>=2.19.0,<3',
Jan 22, 2026
Jan 22, 2026
488
'google-cloud-build>=3.35.0,<4',
Sep 12, 2024
Sep 12, 2024
489
'google-cloud-spanner>=3.0.0,<4',
Aug 17, 2023
Aug 17, 2023
490
# GCP Packages required by ML functionality
491
'google-cloud-dlp>=3.0.0,<4',
Dec 2, 2025
Dec 2, 2025
492
'google-cloud-kms>=3.0.0,<4',
Aug 17, 2023
Aug 17, 2023
493
'google-cloud-language>=2.0,<3',
Sep 26, 2025
Sep 26, 2025
494
'google-cloud-secret-manager>=2.0,<3',
Aug 17, 2023
Aug 17, 2023
495
'google-cloud-videointelligence>=2.0,<3',
496
'google-cloud-vision>=2,<4',
497
'google-cloud-recommendations-ai>=0.1.0,<0.11.0',
Sep 8, 2024
Sep 8, 2024
498
'google-cloud-aiplatform>=1.26.0, < 2.0',
Aug 22, 2025
Aug 22, 2025
499
'cloud-sql-python-connector>=1.18.2,<2.0.0',
500
'python-tds>=1.16.1',
Nov 10, 2025
Nov 10, 2025
501
'pg8000>=1.31.5',
Aug 22, 2025
Aug 22, 2025
502
"PyMySQL>=1.1.0",
Sep 8, 2024
Sep 8, 2024
503
# Authentication for Google Artifact Registry when using
504
# --extra-index-url or --index-url in requirements.txt in
505
# Dataflow, which allows installing python packages from private
506
# Python repositories in GAR.
Nov 3, 2025
Nov 3, 2025
507
'keyrings.google-artifactregistry-auth',
508
'orjson>=3.9.7,<4',
509
'regex>=2020.6.8',
May 11, 2022
May 11, 2022
510
],
511
'interactive': [
Aug 17, 2023
Aug 17, 2023
512
'facets-overview>=1.1.0,<2',
513
'google-cloud-dataproc>=5.0.0,<6',
Jun 6, 2025
Jun 6, 2025
514
'ipython>=7,<9',
Aug 17, 2023
Aug 17, 2023
515
'ipykernel>=6,<7',
516
'ipywidgets>=8,<9',
517
# Skip version 6.1.13 due to
518
# https://github.com/jupyter/jupyter_client/issues/637
519
'jupyter-client>=6.1.11,!=6.1.13,<8.2.1',
Nov 3, 2025
Nov 3, 2025
520
'pydot>=1.2.0,<2',
Aug 17, 2023
Aug 17, 2023
521
'timeloop>=1.0.2,<2',
522
'nbformat>=5.0.5,<6',
523
'nbconvert>=6.2.0,<8',
Apr 21, 2023
Apr 21, 2023
524
] + dataframe_dependency,
May 11, 2022
May 11, 2022
525
'interactive_test': [
Aug 17, 2023
Aug 17, 2023
526
# headless chrome based integration tests
527
'needle>=0.5.0,<1',
Oct 6, 2023
Oct 6, 2023
528
'chromedriver-binary>=117,<118',
Aug 17, 2023
Aug 17, 2023
529
# use a fixed major version of PIL for different python versions
530
'pillow>=7.1.1,<10',
Oct 6, 2023
Oct 6, 2023
531
# urllib 2.x is a breaking change for the headless chrome tests
532
'urllib3<2,>=1.21.1'
May 11, 2022
May 11, 2022
533
],
May 13, 2024
May 13, 2024
534
# Optional dependencies to unit-test ML functionality.
535
# We don't expect users to install this extra. Users should install
536
# necessary dependencies individually, or we should create targeted
537
# extras. Keeping the bounds open as much as possible so that we
May 15, 2024
May 15, 2024
538
# can find out early when Beam doesn't work with new versions.
May 13, 2024
May 13, 2024
539
'ml_test': [
540
'datatable',
Sep 15, 2025
Sep 15, 2025
541
# tensorflow-transform requires dill, but doesn't set dill as a
542
# hard requirement in setup.py.
543
'dill',
Mar 16, 2026
Mar 16, 2026
544
# match tft extra.
545
'tensorflow_transform>=1.14.0,<1.15.0',
546
# TFT->TFX-BSL require pandas 1.x, which is not compatible
547
# with numpy 2.x
548
'numpy<2',
549
# To help with dependency resolution in test suite. Revise once
550
# https://github.com/apache/beam/issues/37854 is fixed
551
'protobuf<4; python_version<"3.11"'
Sep 12, 2024
Sep 12, 2024
552
# Comment out xgboost as it is breaking presubmit python ml
553
# tests due to tag check introduced since pip 24.2
554
# https://github.com/apache/beam/issues/31285
555
# 'xgboost<2.0', # https://github.com/apache/beam/issues/31252
Feb 27, 2026
Feb 27, 2026
556
] + ml_base,
Mar 9, 2026
Mar 9, 2026
557
'p310_ml_test': [
558
'datatable',
559
] + ml_base,
Nov 27, 2024
Nov 27, 2024
560
'p312_ml_test': [
561
'datatable',
Oct 3, 2025
Oct 3, 2025
562
] + ml_base,
Feb 27, 2026
Feb 27, 2026
563
# maintainer: milvus tests only run with this extension. Make sure it
564
# is covered by docker-in-docker test when changing py version
Feb 25, 2026
Feb 25, 2026
565
'p313_ml_test': ml_base + milvus_dependency,
Apr 12, 2023
Apr 12, 2023
566
'aws': ['boto3>=1.9,<2'],
May 11, 2022
May 11, 2022
567
'azure': [
Aug 17, 2023
Aug 17, 2023
568
'azure-storage-blob>=12.3.2,<13',
569
'azure-core>=1.7.0,<2',
570
'azure-identity>=1.12.0,<2',
May 11, 2022
May 11, 2022
571
],
Apr 21, 2023
Apr 21, 2023
572
'dataframe': dataframe_dependency,
Oct 25, 2022
Oct 25, 2022
573
'dask': [
Nov 18, 2024
Nov 18, 2024
574
'distributed >= 2024.4.2',
575
'dask >= 2024.4.2',
576
# For development, 'distributed >= 2023.12.1' should work with
577
# the above dask PR, however it can't be installed as part of
578
# a single `pip` call, since distributed releases are pinned to
579
# specific dask releases. As a workaround, distributed can be
580
# installed first, and then `.[dask]` installed second, with the
581
# `--update` / `-U` flag to replace the dask release brought in
582
# by distributed.
Oct 25, 2022
Oct 25, 2022
583
],
Nov 26, 2025
Nov 26, 2025
584
'hadoop': ['hdfs>=2.1.0,<3.0.0'],
Oct 31, 2023
Oct 31, 2023
585
'yaml': [
Dec 7, 2023
Dec 7, 2023
586
'docstring-parser>=0.15,<1.0',
Sep 6, 2024
Sep 6, 2024
587
'jinja2>=3.0,<3.2',
Jan 11, 2024
Jan 11, 2024
588
'virtualenv-clone>=0.5,<1.0',
Feb 23, 2026
Feb 23, 2026
589
# https://github.com/PiotrDabkowski/Js2Py/issues/317
590
'js2py>=0.74,<1; python_version<"3.12"',
Nov 3, 2025
Nov 3, 2025
591
'jsonschema>=4.0.0,<5.0.0',
Feb 1, 2025
Feb 1, 2025
592
] + dataframe_dependency,
593
# Keep the following dependencies in line with what we test against
594
# in https://github.com/apache/beam/blob/master/sdks/python/tox.ini
595
# For more info, see
596
# https://docs.google.com/document/d/1c84Gc-cZRCfrU8f7kWGsNR2o8oSRjCM-dGHO9KvPWPw/edit?usp=sharing
Jul 1, 2025
Jul 1, 2025
597
'torch': ['torch>=1.9.0,<2.8.0'],
Feb 27, 2026
Feb 27, 2026
598
'tensorflow': [
599
'tensorflow>=2.12rc1,<2.21',
600
# tensorflow transitive dep
601
'absl-py>=0.12.0'
602
],
Feb 1, 2025
Feb 1, 2025
603
'transformers': [
Nov 6, 2025
Nov 6, 2025
604
'transformers>=4.28.0,<4.56.0',
Jul 1, 2025
Jul 1, 2025
605
'tensorflow>=2.12.0',
606
'torch>=1.9.0'
Feb 1, 2025
Feb 1, 2025
607
],
Oct 23, 2025
Oct 23, 2025
608
'ml_cpu': [
609
'tensorflow>=2.12.0',
610
'torch==2.8.0+cpu',
Feb 27, 2026
Feb 27, 2026
611
'transformers>=4.28.0,<4.56.0',
612
# tensorflow transient dep
613
'absl-py>=0.12.0'
Oct 23, 2025
Oct 23, 2025
614
],
Nov 25, 2025
Nov 25, 2025
615
'redis': ['redis>=5.0.0,<6'],
Sep 15, 2025
Sep 15, 2025
616
'tft': [
Mar 16, 2026
Mar 16, 2026
617
'tensorflow_transform>=1.14.0,<1.15.0',
618
# TFT->TFX-BSL require pandas 1.x, which is not compatible
619
# with numpy 2.x
620
'numpy<2',
Oct 3, 2025
Oct 3, 2025
621
# tensorflow-transform requires dill, but doesn't set dill as a
622
# hard requirement in setup.py.
623
'dill'
624
],
Nov 7, 2025
Nov 7, 2025
625
'tfrecord': ['crcmod>=1.7,<2.0'],
Feb 1, 2025
Feb 1, 2025
626
'onnx': [
627
'onnxruntime==1.13.1',
628
'torch==1.13.1',
629
'tensorflow==2.11.0',
630
'tf2onnx==1.13.0',
631
'skl2onnx==1.13',
Feb 27, 2026
Feb 27, 2026
632
'transformers==4.25.1',
633
# tensorflow transient dep
634
'absl-py>=0.12.0'
Feb 1, 2025
Feb 1, 2025
635
],
May 14, 2025
May 14, 2025
636
'xgboost': ['xgboost>=1.6.0,<2.1.3', 'datatable==1.0.0'],
Aug 15, 2025
Aug 15, 2025
637
'tensorflow-hub': ['tensorflow-hub>=0.14.0,<0.16.0'],
Oct 24, 2025
Oct 24, 2025
638
'milvus': milvus_dependency,
Mar 13, 2026
Mar 13, 2026
639
'vllm': ['openai==1.107.1', 'vllm==0.10.1.1', 'triton==3.3.1']
Oct 4, 2021
Oct 4, 2021
640
},
641
zip_safe=False,
642
# PyPI package information.
643
classifiers=[
644
'Intended Audience :: End Users/Desktop',
645
'License :: OSI Approved :: Apache Software License',
646
'Operating System :: POSIX :: Linux',
Oct 18, 2022
Oct 18, 2022
647
'Programming Language :: Python :: 3.10',
Apr 5, 2023
Apr 5, 2023
648
'Programming Language :: Python :: 3.11',
May 23, 2024
May 23, 2024
649
'Programming Language :: Python :: 3.12',
May 27, 2025
May 27, 2025
650
'Programming Language :: Python :: 3.13',
Oct 27, 2021
Oct 27, 2021
651
# When updating version classifiers, also update version warnings
Oct 4, 2021
Oct 4, 2021
652
# above and in apache_beam/__init__.py.
653
'Topic :: Software Development :: Libraries',
654
'Topic :: Software Development :: Libraries :: Python Modules',
655
],
656
license='Apache License, Version 2.0',
657
keywords=PACKAGE_KEYWORDS,
658
cmdclass={
Apr 7, 2022
Apr 7, 2022
659
'mypy': mypy,
Oct 4, 2021
Oct 4, 2021
660
},
661
)