Skip to content

Commit b89d749

Browse files
authored
fix: improve filter compat (acryldata#36)
1 parent 8a93f22 commit b89d749

File tree

2 files changed

+60
-19
lines changed

2 files changed

+60
-19
lines changed

src/mcp_server_datahub/mcp_server.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from datahub.ingestion.graph.client import DataHubGraph
2222
from datahub.sdk.main_client import DataHubClient
2323
from datahub.sdk.search_client import compile_filters
24-
from datahub.sdk.search_filters import Filter, FilterDsl
24+
from datahub.sdk.search_filters import Filter, FilterDsl, load_filters
2525
from datahub.utilities.ordered_set import OrderedSet
2626
from fastmcp import FastMCP
2727
from pydantic import BaseModel
@@ -194,6 +194,10 @@ def get_entity(urn: str) -> dict:
194194
After the final search is performed, you'll want to use the other tools to get more details about the relevant entities.
195195
196196
Here are some example filters:
197+
- All Looker assets
198+
```
199+
{"platform": ["looker"]}
200+
```
197201
- Production environment warehouse assets
198202
```
199203
{
@@ -218,11 +222,26 @@ def get_entity(urn: str) -> dict:
218222
@async_background
219223
def search(
220224
query: str = "*",
221-
filters: Optional[Filter] = None,
225+
filters: Optional[Filter | str] = None,
222226
num_results: int = 10,
223227
) -> dict:
224228
client = get_datahub_client()
225229

230+
# As of 2025-07-25: Our Filter type is a tagged/discriminated union.
231+
#
232+
# We've observed that some tools (e.g. Cursor) don't support discriminated
233+
# unions in their JSON schema validation, and hence reject valid tool calls
234+
# before they're even passed to our MCP server.
235+
# Beyond that, older LLMs (e.g. Claude Desktop w/ Sonnet 3.5) have a tendency
236+
# to pass tool args as JSON-encoded strings instead of proper objects.
237+
#
238+
# To work around these issues, we allow stringified JSON filters that we
239+
# parse on our end. The FastMCP library used to have built-in support for
240+
# handling this, but removed it in
241+
# https://github.com/jlowin/fastmcp/commit/7b9696405b1427f4dc5430891166286744b3dab5
242+
if isinstance(filters, str):
243+
# The Filter type already has a BeforeValidator that parses JSON strings.
244+
filters = load_filters(filters)
226245
types, compiled_filters = compile_filters(filters)
227246
variables = {
228247
"query": query,

tests/test_mcp_server.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,45 @@ async def test_search_no_results() -> None:
6161
assert list(res.keys()) == ["total", "facets"]
6262

6363

64+
@pytest.mark.anyio
65+
async def test_search_simple_filter(mcp_client: Client) -> None:
66+
filters_json = {"platform": ["looker"]}
67+
res = await mcp_client.call_tool(
68+
"search",
69+
arguments={"query": "*", "filters": filters_json},
70+
)
71+
assert res.is_error is False
72+
assert res.data is not None
73+
74+
75+
@pytest.mark.anyio
76+
async def test_search_string_filter(mcp_client: Client) -> None:
77+
filters_json = {"platform": ["looker"]}
78+
res = await mcp_client.call_tool(
79+
"search",
80+
arguments={"query": "*", "filters": json.dumps(filters_json)},
81+
)
82+
assert res.is_error is False
83+
assert res.data is not None
84+
85+
86+
@pytest.mark.anyio
87+
async def test_search_complex_filter(mcp_client: Client) -> None:
88+
filters_json = {
89+
"and": [
90+
{"entity_type": ["DATASET"]},
91+
{"entity_subtype": ["Table"]},
92+
{"not": {"platform": ["snowflake"]}},
93+
]
94+
}
95+
res = await mcp_client.call_tool(
96+
"search",
97+
arguments={"query": "*", "filters": filters_json},
98+
)
99+
assert res.is_error is False
100+
assert res.data is not None
101+
102+
64103
@pytest.mark.anyio
65104
async def test_get_dataset() -> None:
66105
res = await get_entity.fn(_test_urn)
@@ -90,20 +129,3 @@ async def test_get_lineage() -> None:
90129
async def test_get_dataset_queries() -> None:
91130
res = await get_dataset_queries.fn(_test_urn)
92131
assert res is not None
93-
94-
95-
@pytest.mark.anyio
96-
async def test_search(mcp_client: Client) -> None:
97-
filters_json = {
98-
"and": [
99-
{"entity_type": ["DATASET"]},
100-
{"entity_subtype": ["Table"]},
101-
{"not": {"platform": ["snowflake"]}},
102-
]
103-
}
104-
res = await mcp_client.call_tool(
105-
"search",
106-
arguments={"query": "*", "filters": filters_json},
107-
)
108-
assert res.is_error is False
109-
assert res.data is not None

0 commit comments

Comments
 (0)