From 6de3c0b02df16e39ae95f676f952fedd78e8ffdc Mon Sep 17 00:00:00 2001 From: Eric Gustin Date: Tue, 20 May 2025 13:42:09 -0700 Subject: [PATCH 1/4] Add Arcade Expert Toolkit --- .../arcade_expert/.pre-commit-config.yaml | 18 ++++++ toolkits/arcade_expert/.ruff.toml | 44 +++++++++++++ toolkits/arcade_expert/LICENSE | 21 +++++++ toolkits/arcade_expert/Makefile | 58 +++++++++++++++++ .../arcade_arcade_expert/__init__.py | 0 .../arcade_arcade_expert/models.py | 19 ++++++ .../arcade_arcade_expert/tools/__init__.py | 3 + .../tools/search_documentation.py | 61 ++++++++++++++++++ .../arcade_expert/evals/eval_arcade_expert.py | 62 +++++++++++++++++++ toolkits/arcade_expert/pyproject.toml | 42 +++++++++++++ toolkits/arcade_expert/tests/__init__.py | 0 toolkits/arcade_expert/tests/test_models.py | 34 ++++++++++ toolkits/arcade_expert/tox.ini | 16 +++++ 13 files changed, 378 insertions(+) create mode 100644 toolkits/arcade_expert/.pre-commit-config.yaml create mode 100644 toolkits/arcade_expert/.ruff.toml create mode 100644 toolkits/arcade_expert/LICENSE create mode 100644 toolkits/arcade_expert/Makefile create mode 100644 toolkits/arcade_expert/arcade_arcade_expert/__init__.py create mode 100644 toolkits/arcade_expert/arcade_arcade_expert/models.py create mode 100644 toolkits/arcade_expert/arcade_arcade_expert/tools/__init__.py create mode 100644 toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py create mode 100644 toolkits/arcade_expert/evals/eval_arcade_expert.py create mode 100644 toolkits/arcade_expert/pyproject.toml create mode 100644 toolkits/arcade_expert/tests/__init__.py create mode 100644 toolkits/arcade_expert/tests/test_models.py create mode 100644 toolkits/arcade_expert/tox.ini diff --git a/toolkits/arcade_expert/.pre-commit-config.yaml b/toolkits/arcade_expert/.pre-commit-config.yaml new file mode 100644 index 000000000..3953e996e --- /dev/null +++ b/toolkits/arcade_expert/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +files: ^./ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: "v4.4.0" + hooks: + - id: check-case-conflict + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.7 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format diff --git a/toolkits/arcade_expert/.ruff.toml b/toolkits/arcade_expert/.ruff.toml new file mode 100644 index 000000000..9519fe6c3 --- /dev/null +++ b/toolkits/arcade_expert/.ruff.toml @@ -0,0 +1,44 @@ +target-version = "py310" +line-length = 100 +fix = true + +[lint] +select = [ + # flake8-2020 + "YTT", + # flake8-bandit + "S", + # flake8-bugbear + "B", + # flake8-builtins + "A", + # flake8-comprehensions + "C4", + # flake8-debugger + "T10", + # flake8-simplify + "SIM", + # isort + "I", + # mccabe + "C90", + # pycodestyle + "E", "W", + # pyflakes + "F", + # pygrep-hooks + "PGH", + # pyupgrade + "UP", + # ruff + "RUF", + # tryceratops + "TRY", +] + +[lint.per-file-ignores] +"**/tests/*" = ["S101"] + +[format] +preview = true +skip-magic-trailing-comma = false diff --git a/toolkits/arcade_expert/LICENSE b/toolkits/arcade_expert/LICENSE new file mode 100644 index 000000000..8c2d4f375 --- /dev/null +++ b/toolkits/arcade_expert/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025, Arcade + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/toolkits/arcade_expert/Makefile b/toolkits/arcade_expert/Makefile new file mode 100644 index 000000000..c8e999003 --- /dev/null +++ b/toolkits/arcade_expert/Makefile @@ -0,0 +1,58 @@ +.PHONY: help + +help: + @echo "🛠️ arcade_expert Commands:\n" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +.PHONY: install +install: ## Install the poetry environment and install the pre-commit hooks + @echo "📦 Checking if Poetry is installed" + @if ! command -v poetry >/dev/null 2>&1; then \ + echo "📦 Poetry not found. Checking if pip is available"; \ + if ! command -v pip >/dev/null 2>&1; then \ + echo "❌ pip is not installed. Please install pip first."; \ + exit 1; \ + fi; \ + echo "📦 Installing Poetry with pip"; \ + pip install poetry==1.8.5; \ + else \ + echo "📦 Poetry is already installed"; \ + fi + @echo "🚀 Installing package in development mode with all extras" + poetry install --all-extras + +.PHONY: build +build: clean-build ## Build wheel file using poetry + @echo "🚀 Creating wheel file" + poetry build + +.PHONY: clean-build +clean-build: ## clean build artifacts + @echo "🗑️ Cleaning dist directory" + rm -rf dist + +.PHONY: test +test: ## Test the code with pytest + @echo "🚀 Testing code: Running pytest" + @poetry run pytest -W ignore -v --cov --cov-config=pyproject.toml --cov-report=xml + +.PHONY: coverage +coverage: ## Generate coverage report + @echo "coverage report" + coverage report + @echo "Generating coverage report" + coverage html + +.PHONY: bump-version +bump-version: ## Bump the version in the pyproject.toml file + @echo "🚀 Bumping version in pyproject.toml" + poetry version patch + +.PHONY: check +check: ## Run code quality tools. + @echo "🚀 Checking Poetry lock file consistency with 'pyproject.toml': Running poetry check" + @poetry check + @echo "🚀 Linting code: Running pre-commit" + @poetry run pre-commit run -a + @echo "🚀 Static type checking: Running mypy" + @poetry run mypy --config-file=pyproject.toml diff --git a/toolkits/arcade_expert/arcade_arcade_expert/__init__.py b/toolkits/arcade_expert/arcade_arcade_expert/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/toolkits/arcade_expert/arcade_arcade_expert/models.py b/toolkits/arcade_expert/arcade_arcade_expert/models.py new file mode 100644 index 000000000..36aee054f --- /dev/null +++ b/toolkits/arcade_expert/arcade_arcade_expert/models.py @@ -0,0 +1,19 @@ +import validators +from pydantic import BaseModel + + +class Links(BaseModel): + links: list[str] + + def __iter__(self): + return iter(self.links) + + def validate_links(self) -> None: + """Validate links, removing any invalid ones""" + valid_links = [] + + for link in self.links: + if validators.url(link): + valid_links.append(link) + + self.links = valid_links diff --git a/toolkits/arcade_expert/arcade_arcade_expert/tools/__init__.py b/toolkits/arcade_expert/arcade_arcade_expert/tools/__init__.py new file mode 100644 index 000000000..68cf121da --- /dev/null +++ b/toolkits/arcade_expert/arcade_arcade_expert/tools/__init__.py @@ -0,0 +1,3 @@ +from arcade_arcade_expert.tools.search_documentation import search_documentation + +__all__ = ["search_documentation"] diff --git a/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py b/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py new file mode 100644 index 000000000..03d506ab6 --- /dev/null +++ b/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py @@ -0,0 +1,61 @@ +from typing import Annotated + +import httpx +from arcade.sdk import ToolContext, tool +from markdownify import markdownify +from openai import OpenAI + +from arcade_arcade_expert.models import Links + + +@tool(requires_secrets=["OPENAI_API_KEY"]) +async def search_documentation( + context: ToolContext, + query: Annotated[str, "The query to use to search for relevant Arcade.dev documentation"], +) -> Annotated[str, "The answer to the query"]: + """Search Arcade.dev's documentation for the content of pages that are relevant to the query. + + Arcade.dev securely connects your AI to APIs, data, code, and other systems. + + Arcade is an AI Tool-calling Platform. For the first time, AI can securely act on behalf + of users through Arcade's authenticated integrations, or "tools" in AI lingo. Connect AI + to email, files, calendars, and APIs to build assistants that don't just chat - they get + work done. Start building in minutes with our pre-built connectors or custom SDK. + """ + openai_api_key = context.get_secret("OPENAI_API_KEY") + openai_client = OpenAI(api_key=openai_api_key) + + # Get Arcade.dev documentation's llms.txt file + url = "https://docs.arcade.dev/llms.txt" + async with httpx.AsyncClient() as client: + response = await client.get(url) + response.raise_for_status() + data = markdownify(response.text) + + # Get relevant links from the llms.txt file + response = openai_client.beta.chat.completions.parse( + model="gpt-4o", + messages=[ + { + "role": "system", + "content": ( + "Provided a query, you are an expert at selecting the most relevant URLs " + "from a list of URLs. You return at most 5 URLs." + ), + }, + {"role": "user", "content": f"Question: {query}\n\nCandidate Links: {data}"}, + ], + response_format=Links, + ) + links = response.choices[0].message.parsed + + # Get the content of the relevant links + documentation_content: list[str] = [] + async with httpx.AsyncClient() as client: + for link in links: + response = await client.get(link) + response.raise_for_status() + documentation_content.append(markdownify(response.text)) + + links_str = "\n".join(links) + return "\n\n".join(documentation_content) + f"\n\nSources: {links_str}" diff --git a/toolkits/arcade_expert/evals/eval_arcade_expert.py b/toolkits/arcade_expert/evals/eval_arcade_expert.py new file mode 100644 index 000000000..36c93b0f7 --- /dev/null +++ b/toolkits/arcade_expert/evals/eval_arcade_expert.py @@ -0,0 +1,62 @@ +from arcade.sdk import ToolCatalog +from arcade.sdk.eval import ( + EvalRubric, + EvalSuite, + ExpectedToolCall, + SimilarityCritic, + tool_eval, +) + +import arcade_arcade_expert +from arcade_arcade_expert.tools import search_documentation + +# Evaluation rubric +rubric = EvalRubric( + fail_threshold=0.85, + warn_threshold=0.95, +) + + +catalog = ToolCatalog() +catalog.add_module(arcade_arcade_expert) + + +@tool_eval() +def arcade_expert_eval_suite() -> EvalSuite: + suite = EvalSuite( + name="Search Documentation Tool Evaluation", + system_message="Help the user with their queries", + catalog=catalog, + rubric=rubric, + ) + + suite.add_case( + name="Finding engine.yaml location", + user_message="where is my engine.yaml file that Arcade is telling me that I need", + expected_tool_calls=[ + ExpectedToolCall( + func=search_documentation, + args={"query": "engine.yaml file location"}, + ) + ], + rubric=rubric, + critics=[ + SimilarityCritic(critic_field="query", weight=0.3, similarity_threshold=0.4), + ], + ) + + suite.extend_case( + name="Add a custom Reddit OAuth Provider", + user_message="I want to create a new Reddit OAuth Provider. How do I do this?", + expected_tool_calls=[ + ExpectedToolCall( + func=search_documentation, args={"query": "create a new Reddit OAuth Provider"} + ), + ], + rubric=rubric, + critics=[ + SimilarityCritic(critic_field="query", weight=0.3, similarity_threshold=0.4), + ], + ) + + return suite diff --git a/toolkits/arcade_expert/pyproject.toml b/toolkits/arcade_expert/pyproject.toml new file mode 100644 index 000000000..dfc5e3033 --- /dev/null +++ b/toolkits/arcade_expert/pyproject.toml @@ -0,0 +1,42 @@ +[tool.poetry] +name = "arcade_arcade_expert" +version = "0.0.1" +description = "Arcade.dev LLM tools for everything Arcade.dev related. Super meta, we know!" +authors = ["Arcade "] + +[tool.poetry.dependencies] +python = "^3.10" +arcade-ai = "^1.0.5" +httpx = "^0.28.1" +markdownify = "^1.1.0" +validators = "^0.35.0" + +[tool.poetry.dev-dependencies] +pytest = "^8.3.0" +pytest-cov = "^4.0.0" +mypy = "^1.5.1" +pre-commit = "^3.4.0" +tox = "^4.11.1" +ruff = "^0.7.4" + +[build-system] +requires = ["poetry-core>=1.0.0,<2.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.mypy] +files = ["arcade_arcade_expert/**/*.py"] +python_version = "3.10" +disallow_untyped_defs = "True" +disallow_any_unimported = "True" +no_implicit_optional = "True" +check_untyped_defs = "True" +warn_return_any = "True" +warn_unused_ignores = "True" +show_error_codes = "True" +ignore_missing_imports = "True" + +[tool.pytest.ini_options] +testpaths = ["tests"] + +[tool.coverage.report] +skip_empty = true diff --git a/toolkits/arcade_expert/tests/__init__.py b/toolkits/arcade_expert/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/toolkits/arcade_expert/tests/test_models.py b/toolkits/arcade_expert/tests/test_models.py new file mode 100644 index 000000000..512d8d359 --- /dev/null +++ b/toolkits/arcade_expert/tests/test_models.py @@ -0,0 +1,34 @@ +from arcade_arcade_expert.models import Links + + +def test_links_initialization(): + # Test that Links can be initialized with a list of links + links = Links(links=["https://example.com", "https://test.com"]) + assert links.links == ["https://example.com", "https://test.com"] + + +def test_links_iteration(): + # Test that Links is iterable + links = Links(links=["https://example.com", "https://test.com"]) + assert list(links) == ["https://example.com", "https://test.com"] + + +def test_validate_links(): + # Test that validate_links removes invalid URLs + links = Links(links=["https://example.com", "not-a-valid-url", "https://test.com"]) + links.validate_links() + assert links.links == ["https://example.com", "https://test.com"] + + +def test_validate_links_empty(): + # Test with empty list + links = Links(links=[]) + links.validate_links() + assert links.links == [] + + +def test_validate_links_all_invalid(): + # Test with all invalid links + links = Links(links=["not-valid-1", "not-valid-2"]) + links.validate_links() + assert links.links == [] diff --git a/toolkits/arcade_expert/tox.ini b/toolkits/arcade_expert/tox.ini new file mode 100644 index 000000000..fcb62a708 --- /dev/null +++ b/toolkits/arcade_expert/tox.ini @@ -0,0 +1,16 @@ +[tox] +skipsdist = true +envlist = py310, py311, py312 + +[gh-actions] +python = + 3.10: py310 + 3.11: py311 + 3.12: py312 + +[testenv] +passenv = PYTHON_VERSION +allowlist_externals = poetry +commands = + poetry install -v --all-extras + pytest --doctest-modules tests --cov --cov-config=pyproject.toml --cov-report=xml From 28fc8109ca3ae1b7b31eb6272d5079d265dbf24f Mon Sep 17 00:00:00 2001 From: Eric Gustin Date: Tue, 20 May 2025 14:24:03 -0700 Subject: [PATCH 2/4] Add openai dependency --- .../arcade_arcade_expert/tools/search_documentation.py | 10 ++++++---- toolkits/arcade_expert/pyproject.toml | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py b/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py index 03d506ab6..c00e31b10 100644 --- a/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py +++ b/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py @@ -52,10 +52,12 @@ async def search_documentation( # Get the content of the relevant links documentation_content: list[str] = [] async with httpx.AsyncClient() as client: + sources = [] for link in links: response = await client.get(link) - response.raise_for_status() - documentation_content.append(markdownify(response.text)) + if 200 <= response.status_code < 300: + documentation_content.append(markdownify(response.text)) + sources.append(link) - links_str = "\n".join(links) - return "\n\n".join(documentation_content) + f"\n\nSources: {links_str}" + sources_str = "\n".join(sources) + return "\n\n".join(documentation_content) + f"\n\nSources: {sources_str}" diff --git a/toolkits/arcade_expert/pyproject.toml b/toolkits/arcade_expert/pyproject.toml index dfc5e3033..4cf48116c 100644 --- a/toolkits/arcade_expert/pyproject.toml +++ b/toolkits/arcade_expert/pyproject.toml @@ -9,6 +9,7 @@ python = "^3.10" arcade-ai = "^1.0.5" httpx = "^0.28.1" markdownify = "^1.1.0" +openai = "^1.55.3" validators = "^0.35.0" [tool.poetry.dev-dependencies] From b8f34a5cb74a9cf04a7c859e0a62f0e15100497d Mon Sep 17 00:00:00 2001 From: Eric Gustin Date: Tue, 20 May 2025 14:34:50 -0700 Subject: [PATCH 3/4] Lint --- toolkits/arcade_expert/arcade_arcade_expert/models.py | 3 --- .../arcade_arcade_expert/tools/search_documentation.py | 10 +++++++--- toolkits/arcade_expert/tests/test_models.py | 6 ------ 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/toolkits/arcade_expert/arcade_arcade_expert/models.py b/toolkits/arcade_expert/arcade_arcade_expert/models.py index 36aee054f..9eb73bf44 100644 --- a/toolkits/arcade_expert/arcade_arcade_expert/models.py +++ b/toolkits/arcade_expert/arcade_arcade_expert/models.py @@ -5,9 +5,6 @@ class Links(BaseModel): links: list[str] - def __iter__(self): - return iter(self.links) - def validate_links(self) -> None: """Validate links, removing any invalid ones""" valid_links = [] diff --git a/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py b/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py index c00e31b10..db086f754 100644 --- a/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py +++ b/toolkits/arcade_expert/arcade_arcade_expert/tools/search_documentation.py @@ -4,6 +4,7 @@ from arcade.sdk import ToolContext, tool from markdownify import markdownify from openai import OpenAI +from openai.types.chat import ParsedChatCompletion from arcade_arcade_expert.models import Links @@ -33,7 +34,7 @@ async def search_documentation( data = markdownify(response.text) # Get relevant links from the llms.txt file - response = openai_client.beta.chat.completions.parse( + chat_response: ParsedChatCompletion = openai_client.beta.chat.completions.parse( model="gpt-4o", messages=[ { @@ -47,13 +48,16 @@ async def search_documentation( ], response_format=Links, ) - links = response.choices[0].message.parsed + links = chat_response.choices[0].message.parsed + + if not links: + return "No relevant documentation found." # Get the content of the relevant links documentation_content: list[str] = [] async with httpx.AsyncClient() as client: sources = [] - for link in links: + for link in links.links: response = await client.get(link) if 200 <= response.status_code < 300: documentation_content.append(markdownify(response.text)) diff --git a/toolkits/arcade_expert/tests/test_models.py b/toolkits/arcade_expert/tests/test_models.py index 512d8d359..3b5b332f2 100644 --- a/toolkits/arcade_expert/tests/test_models.py +++ b/toolkits/arcade_expert/tests/test_models.py @@ -7,12 +7,6 @@ def test_links_initialization(): assert links.links == ["https://example.com", "https://test.com"] -def test_links_iteration(): - # Test that Links is iterable - links = Links(links=["https://example.com", "https://test.com"]) - assert list(links) == ["https://example.com", "https://test.com"] - - def test_validate_links(): # Test that validate_links removes invalid URLs links = Links(links=["https://example.com", "not-a-valid-url", "https://test.com"]) From aeaadb07e7e605a82c4db8da0c8b2615b22e31aa Mon Sep 17 00:00:00 2001 From: Eric Gustin Date: Tue, 20 May 2025 14:44:14 -0700 Subject: [PATCH 4/4] Remove tox.ini --- toolkits/arcade_expert/tox.ini | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 toolkits/arcade_expert/tox.ini diff --git a/toolkits/arcade_expert/tox.ini b/toolkits/arcade_expert/tox.ini deleted file mode 100644 index fcb62a708..000000000 --- a/toolkits/arcade_expert/tox.ini +++ /dev/null @@ -1,16 +0,0 @@ -[tox] -skipsdist = true -envlist = py310, py311, py312 - -[gh-actions] -python = - 3.10: py310 - 3.11: py311 - 3.12: py312 - -[testenv] -passenv = PYTHON_VERSION -allowlist_externals = poetry -commands = - poetry install -v --all-extras - pytest --doctest-modules tests --cov --cov-config=pyproject.toml --cov-report=xml