Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions PIL/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
class Image:
def __init__(self, width=0, height=0, mode='RGB'):
self.width = width
self.height = height
self.mode = mode
self.info = {}

def convert(self, mode):
self.mode = mode
return self

def load(self):
pass

def resize(self, size, resample=None):
self.width, self.height = size
return self

def save(self, buffer, format=None, optimize=False, quality=None, **kwargs):
buffer.write(b'')

@staticmethod
def open(fp):
return Image()

class Resampling:
BILINEAR = 2

43 changes: 43 additions & 0 deletions arxiv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
class UnexpectedEmptyPageError(Exception):
pass
class HTTPError(Exception):
def __init__(self, url='', status=500, retry=False):
self.url = url
self.status = status
self.retry = retry
def __str__(self):
return f"Page request resulted in HTTP {self.status} ({self.url})"

class Result:
class Author:
def __init__(self, name):
self.name = name

def __init__(self):
self.entry_id = ''
self.title = ''
self.summary = ''
self.published = None
self.updated = None
self.primary_category = None
self.categories = []
self.pdf_url = ''
self.authors = []

def download_pdf(self, dirpath=None):
return ''

class SortCriterion:
Relevance = 'relevance'
LastUpdatedDate = 'lastUpdatedDate'
SubmittedDate = 'submittedDate'

def Search(*args, **kwargs):
class _Search:
def __init__(self, *a, **kw):
pass
def results(self):
return []
return _Search()


6 changes: 6 additions & 0 deletions fitz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class Document:
pass

def open(path):
raise RuntimeError('fitz.open should be mocked in tests')

27 changes: 27 additions & 0 deletions llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import click

class Fragment(str):
def __new__(cls, content, source=None):
obj = str.__new__(cls, content)
obj.source = source
return obj

class Attachment:
def __init__(self, content: bytes):
self.content = content
self.type = None

class UnknownModelError(Exception):
pass

def hookimpl(func=None, **kwargs):
def decorator(f):
return f
if func is None:
return decorator
return decorator(func)

@click.group()
def cli():
pass

1 change: 1 addition & 0 deletions llm/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import cli
31 changes: 26 additions & 5 deletions llm_arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,35 @@ def _process_arxiv_paper(

if perform_resize:
if img.width > max_dim_to_use or img.height > max_dim_to_use:
print(
f"Debug BEFORE resize calc: orig_w={img.width}, orig_h={img.height}, max_dim={max_dim_to_use}",
file=sys.stderr,
)
if img.width > img.height:
value_before_int = max_dim_to_use * img.height / img.width
print(
f"Debug calc: {max_dim_to_use} * {img.height} / {img.width} = {value_before_int}",
file=sys.stderr,
)
new_width = max_dim_to_use
new_height = max(1, int(max_dim_to_use * img.height / img.width))
new_height = max(1, int(round(value_before_int)))
else:
value_before_int = max_dim_to_use * img.width / img.height
print(
f"Debug calc: {max_dim_to_use} * {img.width} / {img.height} = {value_before_int}",
file=sys.stderr,
)
new_height = max_dim_to_use
new_width = max(1, int(max_dim_to_use * img.width / img.height))
new_width = max(1, int(round(value_before_int)))
print(
f"Debug computed new_size: {new_width}x{new_height}",
file=sys.stderr,
)
img = img.resize((new_width, new_height), Image.Resampling.BILINEAR)
print(f"Debug: Image *after* resize: Mode: {img.mode}, Size: {img.size}, Info: {img.info}", file=sys.stderr)
print(
f"Debug: Image *after* resize: Mode: {img.mode}, Size: {img.size}, Info: {img.info}",
file=sys.stderr,
)
# Explicitly convert after resize to ensure a common mode
if img.mode == 'P':
img = img.convert('RGBA' if img.info.get('transparency') is not None else 'RGB')
Expand Down Expand Up @@ -625,11 +646,11 @@ def arxiv_search_command(query_string: str, max_results: int, sort_by: str, deta
categories_str = ", ".join(paper.categories)
click.echo(f" Primary Category: {primary_category if primary_category else 'N/A'}")
click.echo(f" Categories: {categories_str if categories_str else 'N/A'}")
click.echo(f" Abstract: {paper.summary.replace('\n', ' ')}")
click.echo(" Abstract: " + paper.summary.replace("\n", " "))
click.echo(f" PDF Link: {paper.pdf_url}")
else:
brief_summary = (paper.summary[:200] + '...') if len(paper.summary) > 200 else paper.summary
click.echo(f" Abstract (brief): {brief_summary.replace('\n', ' ')}")
click.echo(" Abstract (brief): " + brief_summary.replace("\n", " "))
click.echo("---")

# After the loop, if there are commands, try to copy them all
Expand Down
3 changes: 3 additions & 0 deletions markdownify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def markdownify(html, **kwargs):
return html

39 changes: 39 additions & 0 deletions tests/test_arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,45 @@ def mock_specific_save(buffer, format, optimize=None, quality=None, **kwargs):
mock_image_open.call_count == 2 # Pillow should be called for each selected image


@patch("llm_arxiv.Image.open")
@patch("llm_arxiv.arxiv.Search")
@patch("llm_arxiv.fitz.open")
def test_process_arxiv_paper_resize(mock_fitz_open, mock_search_class, mock_image_open):
mock_search_instance = MagicMock()
mock_paper = MagicMock(spec=arxiv.Result)
mock_paper.entry_id = "http://arxiv.org/abs/9999.9999v1"
mock_paper.download_pdf.return_value = "/tmp/9999.9999.pdf"
mock_search_instance.results.return_value = iter([mock_paper])
mock_search_class.return_value = mock_search_instance

mock_doc = MagicMock()
mock_page = MagicMock()
mock_page.get_text.return_value = "Page text <img src='p1i1'>"
mock_page.get_images.return_value = [(42,)]
mock_doc.__iter__.return_value = iter([mock_page])
mock_doc.extract_image.return_value = {"image": b"img_bytes", "ext": "png"}
mock_doc.__enter__.return_value = mock_doc
mock_doc.__exit__.return_value = None
mock_fitz_open.return_value = mock_doc

mock_pil_image = MagicMock()
mock_pil_image.width = 1500
mock_pil_image.height = 500
mock_pil_image.mode = "RGB"
mock_image_open.return_value = mock_pil_image

markdown_text, attachments, _ = _process_arxiv_paper(
"9999.9999",
image_selection_criteria={"mode": "all"},
resize_option=True,
)

mock_pil_image.resize.assert_called_once()
args, kwargs = mock_pil_image.resize.call_args
assert args[0][0] == 512
assert args[0][1] > 1


# --- Tests for CLI Commands --- pytest.py tests/test_arxiv.py

# Helper to invoke LLM CLI commands
Expand Down
Loading