-
-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Closed
Description
Howdy 👋 I set up an ArchiveBox instance on a public facing VPS with docker-compose (using just ArchiveBox & Sonic). Whenever I send over a YouTube video, it seems to get wedged in a "not yet archived" state while the media archive method appears to not even kick off. I dug into the container's logs a bit and there looks to be an issue starting the media.py tool:
archivebox_1 | Internal Server Error: /add/
archivebox_1 | Traceback (most recent call last):
archivebox_1 | File "/app/archivebox/extractors/__init__.py", line 109, in archive_link
archivebox_1 | result = method_function(link=link, out_dir=out_dir)
archivebox_1 | File "/app/archivebox/util.py", line 114, in typechecked_function
archivebox_1 | return func(*args, **kwargs)
archivebox_1 | File "/app/archivebox/extractors/media.py", line 74, in save_media
archivebox_1 | index_texts = [
archivebox_1 | File "/app/archivebox/extractors/media.py", line 75, in <listcomp>
archivebox_1 | text_file.read_text(encoding='utf-8').strip()
archivebox_1 | File "/usr/local/lib/python3.9/pathlib.py", line 1257, in read_text
archivebox_1 | return f.read()
archivebox_1 | File "/usr/local/lib/python3.9/codecs.py", line 322, in decode
archivebox_1 | (result, consumed) = self._buffer_decode(data, self.errors, final)
archivebox_1 | UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 414699: invalid continuation byte
archivebox_1 | The above exception was the direct cause of the following exception:
archivebox_1 | Traceback (most recent call last):
archivebox_1 | File "/usr/local/lib/python3.9/site-packages/django/core/handlers/exception.py", line 47, in inner
archivebox_1 | response = get_response(request)
archivebox_1 | File "/usr/local/lib/python3.9/site-packages/django/core/handlers/base.py", line 181, in _get_response
archivebox_1 | response = wrapped_callback(request, *callback_args, **callback_kwargs)
archivebox_1 | File "/usr/local/lib/python3.9/site-packages/django/views/generic/base.py", line 70, in view
archivebox_1 | return self.dispatch(request, *args, **kwargs)
archivebox_1 | File "/usr/local/lib/python3.9/site-packages/django/contrib/auth/mixins.py", line 109, in dispatch
archivebox_1 | return super().dispatch(request, *args, **kwargs)
archivebox_1 | File "/usr/local/lib/python3.9/site-packages/django/views/generic/base.py", line 98, in dispatch
archivebox_1 | return handler(request, *args, **kwargs)
archivebox_1 | File "/usr/local/lib/python3.9/site-packages/django/views/generic/edit.py", line 142, in post
archivebox_1 | return self.form_valid(form)
archivebox_1 | File "/app/archivebox/core/views.py", line 286, in form_valid
archivebox_1 | add(**input_kwargs)
archivebox_1 | File "/app/archivebox/util.py", line 114, in typechecked_function
archivebox_1 | return func(*args, **kwargs)
archivebox_1 | File "/app/archivebox/main.py", line 624, in add
archivebox_1 | archive_links(new_links, overwrite=False, **archive_kwargs)
archivebox_1 | File "/app/archivebox/util.py", line 114, in typechecked_function
archivebox_1 | return func(*args, **kwargs)
archivebox_1 | File "/app/archivebox/extractors/__init__.py", line 181, in archive_links
archivebox_1 | archive_link(to_archive, overwrite=overwrite, methods=methods, out_dir=Path(link.link_dir))
archivebox_1 | File "/app/archivebox/util.py", line 114, in typechecked_function
archivebox_1 | return func(*args, **kwargs)
archivebox_1 | File "/app/archivebox/extractors/__init__.py", line 130, in archive_link
archivebox_1 | raise Exception('Exception in archive_methods.save_{}(Link(url={}))'.format(
archivebox_1 | Exception: Exception in archive_methods.save_media(Link(url=https://www.youtube.com/watch?v=r02eaOHenE0))
Any thoughts on how I can clean this up to allow a successful archive?
Metadata
Metadata
Assignees
Labels
No labels