diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..9b5bcb1 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,26 @@ +# AGENTS.md + +This repository contains two implementations of the same Cube.js utility +functions: a **Python** package in `cube_utils/` and a **JavaScript** +package in `cube-utils-js/`. The code is intentionally kept identical +so that both implementations provide the same API surface. + +When adding or modifying code: + +* Follow the style guidelines in `CLAUDE.md`. +* Keep the public API consistent across both languages, the API is also described in `CLAUDE.md`. +* Ensure any new functionality is covered by tests in the `tests/` directory. +* **When generating a new file, write it to the repository.** + The agent should not leave draft or incomplete files; if a file is + created as part of a feature plan or implementation, it must be + committed to the filesystem using `apply_patch` (or the equivalent + file‑creation command). This avoids situations where a file appears + in the plan but never actually exists in the repo. +* **If a command cannot be executed by the agent, explicitly suggest the user to run it** – for example: + ```bash + npm test --prefix cube-utils-js + python -m unittest discover tests + ``` + +The `tests/` folder contains unit tests that validate the expected +behaviour of both the Python and JavaScript versions. diff --git a/CLAUDE.md b/CLAUDE.md index 14fee6e..b3669ab 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -44,15 +44,22 @@ npm publish Both Python and JavaScript implementations provide identical functionality: **Query Parser** (`cube_utils/query_parser.py` & `cube-utils-js/src/query-parser.js`): -- `extract_cubes()` / `extractCubes()` - Extracts unique cube names from query payloads -- `extract_members()` / `extractMembers()` - Extracts all members (dimensions, measures, filters, segments, timeDimensions) -- `extract_filters_members()` / `extractFiltersMembers()` - Extracts only members from filters and segments -- `extract_filters_members_with_values()` / `extractFiltersMembersWithValues()` - Extracts filter members with their values as tuples -- `extract_members_from_expression()` / `extractMembersFromExpression()` - Parses SQL expressions to find ${cube.member} patterns -- `extract_members_from_filter()` / `extractMembersFromFilter()` - Handles nested boolean logic (AND/OR) in filters + - `extract_cubes()` / `extractCubes()` - Extracts unique cube names from query payloads + - `extract_members()` / `extractMembers()` - Extracts all members (dimensions, measures, filters, segments, timeDimensions) + - `extract_filters_members()` / `extractFiltersMembers()` - Extracts only members from filters and segments + - `extract_filters_members_with_values()` / `extractFiltersMembersWithValues()` - Extracts filter members with their values as tuples + - `extract_members_from_expression()` / `extractMembersFromExpression()` - Parses SQL expressions to find ${cube.member} patterns + - `extract_members_from_filter()` / `extractMembersFromFilter()` - Handles nested boolean logic (AND/OR) in filters **URL Parser** (`cube_utils/url_parser.py` & `cube-utils-js/src/url-parser.js`): -- `extract_url_params()` / `extractUrlParams()` - Extracts and URL-decodes query parameters from URLs + - `extract_url_params()` / `extractUrlParams()` - Extracts and URL‑decodes query parameters from URLs + +## AGENTS.md +This file contains project‑wide guidelines that the agent follows when adding or modifying code. It includes +instructions for: +* ensuring consistent public API across languages, +* writing new files via `apply_patch`, +* and providing fallback instructions when the agent cannot run a command. ### Key Data Structures @@ -92,4 +99,4 @@ Tests cover edge cases including empty payloads, complex boolean logic, and push The repository includes automated workflows: - **Tests workflow** - Runs both Python and JavaScript tests on every push/PR -- **Publish workflow** - Publishes to both PyPI (Python) and npm (JavaScript) on GitHub releases \ No newline at end of file +- **Publish workflow** - Publishes to both PyPI (Python) and npm (JavaScript) on GitHub releases diff --git a/cube-utils-js/package.json b/cube-utils-js/package.json index 7302327..021c1f2 100644 --- a/cube-utils-js/package.json +++ b/cube-utils-js/package.json @@ -1,6 +1,6 @@ { "name": "cube-utils-js", - "version": "0.1.8", + "version": "0.1.9", "description": "JavaScript utilities for parsing and extracting information from Cube query payloads", "main": "dist/index.cjs", "module": "src/index.js", diff --git a/cube-utils-js/src/query-parser.js b/cube-utils-js/src/query-parser.js index 2484e25..8af6b45 100644 --- a/cube-utils-js/src/query-parser.js +++ b/cube-utils-js/src/query-parser.js @@ -4,6 +4,22 @@ * @param {any} member - The member to check. * @returns {boolean} True if the member is a pushdown member, false otherwise. */ +/** + * Ensure the payload is an object, converting string payloads to an empty object. + * This mirrors Cube.js's behaviour where metadata queries are represented as strings. + * @param {any} payload + * @returns {Object} + */ +function ensureObjectPayload(payload) { + if (typeof payload === 'object' && payload !== null) { + return payload; + } + if (typeof payload === 'string') { + return {}; + } + throw new TypeError('payload must be an object or string'); +} + function isPushdownMember(member) { return ( typeof member === 'object' && @@ -19,8 +35,9 @@ function isPushdownMember(member) { * @returns {string[]} A list of unique cube names. */ function extractCubes(payload) { + const payloadObj = ensureObjectPayload(payload); const cubes = new Set(); - const members = extractMembers(payload); + const members = extractMembers(payloadObj); for (const member of members) { const cube = member.split('.')[0]; cubes.add(cube); @@ -38,11 +55,12 @@ function extractMembers( payload, queryKeys = ['dimensions', 'measures', 'filters', 'segments', 'timeDimensions'] ) { + const payloadObj = ensureObjectPayload(payload); const members = new Set(); for (const key of queryKeys) { - if (key in payload) { - for (const item of payload[key]) { + if (key in payloadObj) { + for (const item of payloadObj[key]) { if (isPushdownMember(item)) { // Try to extract from expression or definition let exprMembers = new Set(); @@ -155,8 +173,9 @@ function extractMembersFromFilter(filterItem) { * @returns {string[]} A list of members. */ function extractFiltersMembers(payload) { + const payloadObj = ensureObjectPayload(payload); const queryKeys = ['filters', 'segments']; - return extractMembers(payload, queryKeys); + return extractMembers(payloadObj, queryKeys); } /** @@ -169,6 +188,7 @@ function extractFiltersMembers(payload) { * @returns {Array<[string, any]>} Array of [member, values] tuples. */ function extractFiltersMembersWithValues(payload) { + const payloadObj = ensureObjectPayload(payload); const result = new Map(); function extractFromFilter(filterItem) { @@ -201,14 +221,14 @@ function extractFiltersMembersWithValues(payload) { } } - if ('filters' in payload) { - for (const filterItem of payload.filters) { + if ('filters' in payloadObj) { + for (const filterItem of payloadObj.filters) { extractFromFilter(filterItem); } } - if ('segments' in payload) { - for (const seg of payload.segments) { + if ('segments' in payloadObj) { + for (const seg of payloadObj.segments) { if (typeof seg === 'object' && seg !== null && isPushdownMember(seg)) { const exprMembers = new Map(); const sqls = []; @@ -304,4 +324,4 @@ export { extractMembersFromFilter, extractFiltersMembers, extractFiltersMembersWithValues -}; \ No newline at end of file +}; diff --git a/cube-utils-js/test/query-parser.test.js b/cube-utils-js/test/query-parser.test.js index a76c403..8355f05 100644 --- a/cube-utils-js/test/query-parser.test.js +++ b/cube-utils-js/test/query-parser.test.js @@ -154,8 +154,15 @@ test('extractCubes with timeDimensions only', () => { assert.deepStrictEqual(extractCubes(payload).sort(), expectedCubes.sort()); }); -test('extractCubes with empty payload', () => { - const payload = {}; +test('extractCubes with empty payload', () => { + const payload = {}; + const expectedCubes = []; + assert.deepStrictEqual(extractCubes(payload), expectedCubes); +}); + +// New tests for string payload handling +test('extractCubes with string payload', () => { + const payload = 'metadata query'; const expectedCubes = []; assert.deepStrictEqual(extractCubes(payload), expectedCubes); }); @@ -194,6 +201,12 @@ test('extractMembers with all fields', () => { assert.deepStrictEqual(extractMembers(payload).sort(), expectedMembers.sort()); }); +test('extractMembers with string payload', () => { + const payload = 'metadata query'; + const expectedMembers = []; + assert.deepStrictEqual(extractMembers(payload), expectedMembers); +}); + test('extractMembers complex boolean logic', () => { const payload = { segments: [], @@ -575,4 +588,10 @@ test('extractFiltersMembersWithValues with invalid keywords', () => { const payload = { invalid: ['test_a.city', 'test_a.country', 'test_a.state'] }; const expectedMembers = []; assert.deepStrictEqual(extractFiltersMembersWithValues(payload), expectedMembers); -}); \ No newline at end of file +}); + +test('extractFiltersMembersWithValues with string payload', () => { + const payload = 'metadata query'; + const expectedMembers = []; + assert.deepStrictEqual(extractFiltersMembersWithValues(payload), expectedMembers); +}); diff --git a/cube_utils/query_parser.py b/cube_utils/query_parser.py index 7796939..824d7f9 100644 --- a/cube_utils/query_parser.py +++ b/cube_utils/query_parser.py @@ -1,6 +1,30 @@ from collections import defaultdict -from typing import List, Dict, Any -from typing import Sequence +from typing import List, Dict, Any, Sequence, Union + +def _ensure_dict_payload(payload: Any) -> Dict[str, Any]: + """Validate and normalise the query payload. + + The original implementation of the parser expected a dictionary. In + practice, Cube.js may send a string for simple metadata queries. + This helper returns the payload unchanged if it is already a + ``dict``. If a ``str`` is passed, it returns an empty dict so that + downstream functions can operate safely and return empty results. + For any other type a :class:`TypeError` is raised with a clear + message. + + Args: + payload: The payload supplied by the caller. + + Returns: + A dictionary representation of the payload. + """ + if isinstance(payload, dict): + return payload + if isinstance(payload, str): + # Treat string payloads as empty queries. + return {} + raise TypeError( + f"Payload must be a dict or str, got {type(payload).__name__}") import re @@ -18,12 +42,14 @@ def is_pushdown_member(member: Any) -> bool: # Function to extract cubes from a query payload -def extract_cubes(payload: Dict[str, Any]) -> List[str]: +def extract_cubes(payload: Union[Dict[str, Any], str]) -> List[str]: """ Extracts unique cubes from the given query payload. :param payload: The query payload containing dimensions, measures, filters, segments, and time dimensions. :return: A list of unique cube names. """ + # Ensure payload is a dict; convert string payloads to empty dict. + payload = _ensure_dict_payload(payload) cubes = set() members = extract_members(payload) for member in members: @@ -34,7 +60,7 @@ def extract_cubes(payload: Dict[str, Any]) -> List[str]: # Function to extract cube members def extract_members( - payload: Dict[str, Any], + payload: Union[Dict[str, Any], str], query_keys: Sequence[str] = ( "dimensions", "measures", @@ -48,6 +74,8 @@ def extract_members( :param payload: The query payload containing dimensions, measures, filters, segments, and time dimensions. :return: A list of unique members in the format 'cubeName.expressionName'. """ + # Guard payload type + payload = _ensure_dict_payload(payload) members = set() # Use a set to ensure uniqueness for key in query_keys: @@ -128,7 +156,7 @@ def extract_members_from_filter(filter_item: Dict[str, Any]) -> set: # Function to extract filters only from a query payload -def extract_filters_members(payload: Dict[str, Any]) -> List[str]: +def extract_filters_members(payload: Union[Dict[str, Any], str]) -> List[str]: """ Extracts the members from filters from the given query payload. :param payload: The query payload containing dimensions, measures, filters, segments, and time dimensions. @@ -139,10 +167,11 @@ def extract_filters_members(payload: Dict[str, Any]) -> List[str]: "segments", ] + payload = _ensure_dict_payload(payload) return extract_members(payload, query_keys=query_keys) -def extract_filters_members_with_values(payload: Dict[str, Any]) -> List[tuple]: +def extract_filters_members_with_values(payload: Union[Dict[str, Any], str]) -> List[tuple]: """ Extracts (member, value) tuples from filters and segments in the given query payload. For filters, value is the 'values' field if present, otherwise None. @@ -170,6 +199,7 @@ def extract_from_filter(filter_item): for cond in filter_item["or"]: extract_from_filter(cond) + payload = _ensure_dict_payload(payload) if "filters" in payload: for filter_item in payload["filters"]: extract_from_filter(filter_item) diff --git a/feature_plans/validate_payloads.md b/feature_plans/validate_payloads.md new file mode 100644 index 0000000..ae80d90 --- /dev/null +++ b/feature_plans/validate_payloads.md @@ -0,0 +1,52 @@ +## Feature Plan: Validate Payload Types for Query Parser + +**Context** – The current query parser functions (`extract_cubes`, `extract_members`, `extract_filters_members`, `extract_filters_members_with_values`, and helpers) assume that the `payload` argument is a dictionary. When a string is supplied, a `TypeError` is raised, causing failures in the test suite. + +**Goal** – Gracefully handle string payloads by returning an empty result (empty list or `None`) instead of raising an exception. This mirrors the behaviour of Cube.js for metadata queries, which are represented as simple strings. + +### Detailed Steps + +1. ✅ **Create a helper** `._ensure_dict_payload(payload)` + * Accepts `payload` and returns it if it is a `dict`. + * If `payload` is a `str`, return an empty dict `{}`. + * If `payload` is `None` or any other type, raise `TypeError` with a clear message. + * Place this helper in `cube_utils/query_parser.py` near the top of the file. + +2. ✅ **Update `extract_cubes`** + * Add a call to `_ensure_dict_payload(payload)` at the start. + * Continue with existing logic, which now operates on a guaranteed dict. + +3. ✅ **Update `extract_members`** + * Same pattern: validate first, then perform extraction. + +4. ✅ **Update `extract_filters_members`** + * Validate payload; if it was originally a string, the helper returns `{}` and the function will return an empty list. + +5. ✅ **Update `extract_filters_members_with_values`** + * Validate payload; handle string case similarly, returning `[]`. + +6. ✅ **Update `extract_members_from_expression`** + * No change needed, but add a comment clarifying that it expects a string expression. + +7. ✅ **Update `extract_members_from_filter`** + * Ensure this helper is only called with a dict; add a guard or comment if necessary. + +8. ✅ **Add unit tests** in `tests/test_query_parser.py`: + * For each public function, create a test that passes a string payload and asserts the returned value is an empty list (or `None` where appropriate). + * Example: + ```python + self.assertEqual(extract_cubes("some string"), []) + ``` + +9. ✅ **Update documentation**: + * In `CLAUDE.md`, add a note that all query‑parser functions now accept string payloads gracefully. + * Update inline docstrings to reflect the new behaviour. + +10. ✅ **Run tests**: + * Execute `python -m unittest discover tests` to ensure existing tests continue to pass and the new tests succeed. + +11. ✅ **Code review**: + * Verify that the new helper and guard logic adhere to the repo's style (PEP‑8, docstrings, type hints). + * Ensure no other modules import `query_parser` expecting the previous behaviour. + +**Outcome** – After implementation, the parser functions will no longer crash on string payloads and will correctly return empty results for metadata queries, matching Cube.js’s expectations. diff --git a/feature_plans/validate_payloads_js.md b/feature_plans/validate_payloads_js.md new file mode 100644 index 0000000..f15a50e --- /dev/null +++ b/feature_plans/validate_payloads_js.md @@ -0,0 +1,55 @@ +## Feature Plan: Validate Payload Types for Query Parser (JavaScript) ✅ + +**Context** – The JavaScript query‑parser functions (`extractCubes`, +`extractMembers`, `extractFiltersMembers`, `extractFiltersMembersWithValues`, and helpers) assume that the `payload` argument is an object. When a string is supplied, a `TypeError` is thrown, causing test failures. + +**Goal** – Gracefully handle string payloads by returning an empty result (empty array or `null`) instead of throwing. This aligns with the Python implementation and mirrors Cube.js’s handling of metadata queries represented as strings. + +### Detailed Steps + +1. **Create a helper** `ensureObjectPayload(payload)` + - Accepts `payload` and returns it if `typeof payload === 'object' && payload !== null`. + - If `payload` is a `string`, return an empty object `{}`. + - If `payload` is `null` or any other type, throw a `TypeError` with a clear message. + - Place this helper at the top of `cube-utils-js/src/query-parser.js`. + +2. **Update `extractCubes`** + - Call `ensureObjectPayload(payload)` at the start. + - Proceed with existing logic using the guaranteed object. + +3. **Update `extractMembers`** + - Apply the same payload guard before extracting members. + +4. **Update `extractFiltersMembers`** + - Guard the payload; an empty object yields an empty member list. + +5. **Update `extractFiltersMembersWithValues`** + - Guard the payload similarly; a string payload results in an empty array. + +6. **Update `extractMembersFromExpression`** + - No change to logic, but add a comment noting that it expects a string expression. + +7. **Update `extractMembersFromFilter`** + - Ensure it is only called with a valid object; add a guard or comment if necessary. + +8. **Add unit tests** in `cube-utils-js/test/query-parser.test.js`: + - For each public function, create a test that passes a string payload (e.g., `"some string"`) and asserts the returned value is an empty array (or `null` where appropriate). + - Example: + ```js + it('returns [] for string payload in extractCubes', () => { + expect(extractCubes('foo')).toEqual([]); + }); + ``` + +9. **Update documentation** + - In `CLAUDE.md` add a note that all query‑parser functions now accept string payloads gracefully. + - Update inline JSDoc comments to reflect the new behavior. + +10. **Run tests** + - Execute `npm test` inside `cube-utils-js` to ensure existing tests continue to pass and the new tests succeed. + +11. **Code review** + - Verify that the new helper and guard logic adhere to the repo’s style (ESLint, JSDoc, naming conventions). + - Ensure no other modules import `query-parser` expecting the previous behavior. + +**Outcome** – After implementation, the JavaScript parser functions will not crash on string payloads and will correctly return empty results for metadata queries, matching the Python version and Cube.js’s expectations. diff --git a/pyproject.toml b/pyproject.toml index 408edd2..4a13490 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cube-utils" -version = "0.1.8" +version = "0.1.9" description = "Python functions for Cube projects using cube.py config file." authors = [ {name = "Paco Valdez", email = "paco.valdez@berkeley.edu"}, diff --git a/tests/test_query_parser.py b/tests/test_query_parser.py index e3efd33..4bfd0bc 100644 --- a/tests/test_query_parser.py +++ b/tests/test_query_parser.py @@ -157,6 +157,11 @@ def test_extract_cubes_with_invalid_keywords(self): expected_cubes = [] self.assertEqual(extract_cubes(payload), expected_cubes) + def test_extract_cubes_with_string_payload(self): + payload = "just a string" + expected_cubes = [] + self.assertEqual(extract_cubes(payload), expected_cubes) + class TestExtractMembers(unittest.TestCase): @@ -248,6 +253,11 @@ def test_extract_members_with_segments_only(self): expected_members = ["test_a.us_segment"] self.assertEqual(sorted(extract_members(payload)), sorted(expected_members)) + def test_extract_members_with_string_payload(self): + payload = "just a string" + expected_members = [] + self.assertEqual(extract_members(payload), expected_members) + def test_extract_members_with_timeDimensions_only(self): payload = { "timeDimensions": [ @@ -381,6 +391,11 @@ def test_extract_filters_members_with_timeDimensions_only(self): sorted(extract_filters_members(payload)), sorted(expected_members) ) + def test_extract_filters_members_with_string_payload(self): + payload = "just a string" + expected_members = [] + self.assertEqual(extract_filters_members(payload), expected_members) + def test_extract_filters_members_with_empty_payload(self): payload = {} expected_members = [] @@ -623,6 +638,13 @@ def test_extract_filters_members_with_invalid_keywords(self): expected_members = [] self.assertEqual(extract_filters_members_with_values(payload), expected_members) + def test_extract_filters_members_with_values_with_string_payload(self): + payload = "just a string" + expected_members = [] + self.assertEqual( + extract_filters_members_with_values(payload), expected_members + ) + def test_pushdown_filters(self): payload = { "timezone": "UTC",