Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 39 additions & 2 deletions camelot/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ def __init__(
debug=False,
):
self.debug = debug
self.is_temp_file = is_url(filepath)
if is_url(filepath):
filepath = download_url(str(filepath))
self.filepath: StrByteType | Path | str = filepath
self.filepath = download_url(str(filepath))
else:
self.filepath: StrByteType | Path | str = filepath

if isinstance(filepath, str) and not filepath.lower().endswith(".pdf"):
raise NotImplementedError("File format not supported")
Expand All @@ -77,6 +79,41 @@ def __init__(
self.password = password
self.pages = self._get_pages(pages)

def __enter__(self):
"""Enter the context manager.

Returns
-------
PDFHandler
The instance itself.
"""
return self

def __exit__(self, exc_type, exc_val, exc_tb):
"""Exit the context manager and clean up temporary files.

Deletes the temporary file if it was created from a URL.

Parameters
----------
exc_type : type or None
Type of the exception raised in the context, if any.
exc_val : Exception or None
The exception instance raised, if any.
exc_tb : traceback or None
The traceback of the exception, if any.
"""
if self.is_temp_file and os.path.exists(self.filepath): # type: ignore
os.remove(self.filepath) # type: ignore

def close(self):
"""Close the handler and clean up temporary files.

Deletes the temporary file if it was created from a URL.
"""
if self.is_temp_file and os.path.exists(self.filepath): # type: ignore
os.remove(self.filepath) # type: ignore

def _get_pages(self, pages):
"""Convert pages string to list of integers.

Expand Down
18 changes: 9 additions & 9 deletions camelot/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,13 @@ def read_pdf(
warnings.simplefilter("ignore")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages=pages, password=password, debug=debug)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(
flavor=flavor,
suppress_stdout=suppress_stdout,
parallel=parallel,
layout_kwargs=layout_kwargs,
**kwargs,
)
with PDFHandler(filepath, pages=pages, password=password, debug=debug) as p:
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(
flavor=flavor,
suppress_stdout=suppress_stdout,
parallel=parallel,
layout_kwargs=layout_kwargs,
**kwargs,
)
return tables
Loading