agustif · agustif · May 17, 2025
diff --git a/PIL/__init__.py b/PIL/__init__.py
@@ -0,0 +1,28 @@
+class Image:
+    def __init__(self, width=0, height=0, mode='RGB'):
+        self.width = width
+        self.height = height
+        self.mode = mode
+        self.info = {}
+
+    def convert(self, mode):
+        self.mode = mode
+        return self
+
+    def load(self):
+        pass
+
+    def resize(self, size, resample=None):
+        self.width, self.height = size
+        return self
+
+    def save(self, buffer, format=None, optimize=False, quality=None, **kwargs):
+        buffer.write(b'')
+
+    @staticmethod
+    def open(fp):
+        return Image()
+
+class Resampling:
+    BILINEAR = 2
+
diff --git a/arxiv.py b/arxiv.py
@@ -0,0 +1,43 @@
+class UnexpectedEmptyPageError(Exception):
+    pass
+class HTTPError(Exception):
+    def __init__(self, url='', status=500, retry=False):
+        self.url = url
+        self.status = status
+        self.retry = retry
+    def __str__(self):
+        return f"Page request resulted in HTTP {self.status} ({self.url})"
+
+class Result:
+    class Author:
+        def __init__(self, name):
+            self.name = name
+
+    def __init__(self):
+        self.entry_id = ''
+        self.title = ''
+        self.summary = ''
+        self.published = None
+        self.updated = None
+        self.primary_category = None
+        self.categories = []
+        self.pdf_url = ''
+        self.authors = []
+
+    def download_pdf(self, dirpath=None):
+        return ''
+
+class SortCriterion:
+    Relevance = 'relevance'
+    LastUpdatedDate = 'lastUpdatedDate'
+    SubmittedDate = 'submittedDate'
+
+def Search(*args, **kwargs):
+    class _Search:
+        def __init__(self, *a, **kw):
+            pass
+        def results(self):
+            return []
+    return _Search()
+
+
diff --git a/fitz.py b/fitz.py
@@ -0,0 +1,6 @@
+class Document:
+    pass
+
+def open(path):
+    raise RuntimeError('fitz.open should be mocked in tests')
+
diff --git a/llm/__init__.py b/llm/__init__.py
@@ -0,0 +1,27 @@
+import click
+
+class Fragment(str):
+    def __new__(cls, content, source=None):
+        obj = str.__new__(cls, content)
+        obj.source = source
+        return obj
+
+class Attachment:
+    def __init__(self, content: bytes):
+        self.content = content
+        self.type = None
+
+class UnknownModelError(Exception):
+    pass
+
+def hookimpl(func=None, **kwargs):
+    def decorator(f):
+        return f
+    if func is None:
+        return decorator
+    return decorator(func)
+
+@click.group()
+def cli():
+    pass
+
diff --git a/llm/cli.py b/llm/cli.py
@@ -0,0 +1 @@
+from . import cli
diff --git a/llm_arxiv.py b/llm_arxiv.py
@@ -202,14 +202,35 @@ def _process_arxiv_paper(
 
                                 if perform_resize:
                                     if img.width > max_dim_to_use or img.height > max_dim_to_use:
+                                        print(
+                                            f"Debug BEFORE resize calc: orig_w={img.width}, orig_h={img.height}, max_dim={max_dim_to_use}",
+                                            file=sys.stderr,
+                                        )
                                         if img.width > img.height:
+                                            value_before_int = max_dim_to_use * img.height / img.width
+                                            print(
+                                                f"Debug calc: {max_dim_to_use} * {img.height} / {img.width} = {value_before_int}",
+                                                file=sys.stderr,
+                                            )
                                             new_width = max_dim_to_use
-                                            new_height = max(1, int(max_dim_to_use * img.height / img.width))
+                                            new_height = max(1, int(round(value_before_int)))
                                         else:
+                                            value_before_int = max_dim_to_use * img.width / img.height
+                                            print(
+                                                f"Debug calc: {max_dim_to_use} * {img.width} / {img.height} = {value_before_int}",
+                                                file=sys.stderr,
+                                            )
                                             new_height = max_dim_to_use
-                                            new_width = max(1, int(max_dim_to_use * img.width / img.height))
+                                            new_width = max(1, int(round(value_before_int)))
+                                        print(
+                                            f"Debug computed new_size: {new_width}x{new_height}",
+                                            file=sys.stderr,
+                                        )
                                         img = img.resize((new_width, new_height), Image.Resampling.BILINEAR)
-                                        print(f"Debug: Image *after* resize: Mode: {img.mode}, Size: {img.size}, Info: {img.info}", file=sys.stderr)
+                                        print(
+                                            f"Debug: Image *after* resize: Mode: {img.mode}, Size: {img.size}, Info: {img.info}",
+                                            file=sys.stderr,
+                                        )
                                         # Explicitly convert after resize to ensure a common mode
                                         if img.mode == 'P':
                                             img = img.convert('RGBA' if img.info.get('transparency') is not None else 'RGB')
@@ -625,11 +646,11 @@ def arxiv_search_command(query_string: str, max_results: int, sort_by: str, deta
                     categories_str = ", ".join(paper.categories)
                     click.echo(f"    Primary Category: {primary_category if primary_category else 'N/A'}")
                     click.echo(f"    Categories: {categories_str if categories_str else 'N/A'}")
-                    click.echo(f"    Abstract: {paper.summary.replace('\n', ' ')}")
+                    click.echo("    Abstract: " + paper.summary.replace("\n", " "))
                     click.echo(f"    PDF Link: {paper.pdf_url}")
                 else:
                     brief_summary = (paper.summary[:200] + '...') if len(paper.summary) > 200 else paper.summary
-                    click.echo(f"    Abstract (brief): {brief_summary.replace('\n', ' ')}")
+                    click.echo("    Abstract (brief): " + brief_summary.replace("\n", " "))
                 click.echo("---")
 
             # After the loop, if there are commands, try to copy them all

diff --git a/markdownify.py b/markdownify.py
@@ -0,0 +1,3 @@
+def markdownify(html, **kwargs):
+    return html
+
diff --git a/tests/test_arxiv.py b/tests/test_arxiv.py
@@ -409,6 +409,45 @@ def mock_specific_save(buffer, format, optimize=None, quality=None, **kwargs):
     mock_image_open.call_count == 2 # Pillow should be called for each selected image
 
 
+@patch("llm_arxiv.Image.open")
+@patch("llm_arxiv.arxiv.Search")
+@patch("llm_arxiv.fitz.open")
+def test_process_arxiv_paper_resize(mock_fitz_open, mock_search_class, mock_image_open):
+    mock_search_instance = MagicMock()
+    mock_paper = MagicMock(spec=arxiv.Result)
+    mock_paper.entry_id = "http://arxiv.org/abs/9999.9999v1"
+    mock_paper.download_pdf.return_value = "/tmp/9999.9999.pdf"
+    mock_search_instance.results.return_value = iter([mock_paper])
+    mock_search_class.return_value = mock_search_instance
+
+    mock_doc = MagicMock()
+    mock_page = MagicMock()
+    mock_page.get_text.return_value = "Page text <img src='p1i1'>"
+    mock_page.get_images.return_value = [(42,)]
+    mock_doc.__iter__.return_value = iter([mock_page])
+    mock_doc.extract_image.return_value = {"image": b"img_bytes", "ext": "png"}
+    mock_doc.__enter__.return_value = mock_doc
+    mock_doc.__exit__.return_value = None
+    mock_fitz_open.return_value = mock_doc
+
+    mock_pil_image = MagicMock()
+    mock_pil_image.width = 1500
+    mock_pil_image.height = 500
+    mock_pil_image.mode = "RGB"
+    mock_image_open.return_value = mock_pil_image
+
+    markdown_text, attachments, _ = _process_arxiv_paper(
+        "9999.9999",
+        image_selection_criteria={"mode": "all"},
+        resize_option=True,
+    )
+
+    mock_pil_image.resize.assert_called_once()
+    args, kwargs = mock_pil_image.resize.call_args
+    assert args[0][0] == 512
+    assert args[0][1] > 1
+
+
 # --- Tests for CLI Commands --- pytest.py tests/test_arxiv.py
 
 # Helper to invoke LLM CLI commands