From c16fde3c06daf095c9ec852e5900fc0ef7b1d05f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 8 Nov 2021 21:36:04 +0200 Subject: [PATCH 1/6] feat: allow cell magic body to be a $variable --- google/cloud/bigquery/magics/magics.py | 19 +++++ tests/unit/test_magics.py | 113 ++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index ec0430518..67dfa6044 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -596,6 +596,25 @@ def _cell_magic(line, query): _handle_error(error, args.destination_var) return + # Check if query is given as a reference to a variable. + if query.startswith("$"): + query_var_name = query[1:] + + if query_var_name.isidentifier(): + ip = IPython.get_ipython() + try: + query = ip.user_ns[query_var_name] + except KeyError: + raise NameError( + f"Unknown query, variable {query_var_name} does not exist." + ) from None + else: + if not isinstance(query, (str, bytes)): + raise TypeError( + f"Query variable {query_var_name} must be string " + "or bytes-like value." + ) + # Any query that does not contain whitespace (aside from leading and trailing whitespace) # is assumed to be a table id if not re.search(r"\s", query): diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 36cbf4993..7d73732e7 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -584,7 +584,7 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic_clears_display_in_verbose_mode(): +def test_bigquery_magic_clears_display_in_non_verbose_mode(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context.credentials = mock.create_autospec( @@ -1710,6 +1710,117 @@ def test_bigquery_magic_with_improperly_formatted_params(): ip.run_cell_magic("bigquery", "--params {17}", sql) +@pytest.mark.parametrize( + "raw_sql", ("SELECT answer AS 42", " \t SELECT answer AS 42 \t ") +) +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw_sql): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + ipython_ns_cleanup.append((ip, "custom_query")) + ipython_ns_cleanup.append((ip, "query_results_df")) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + mock_result = pandas.DataFrame([42], columns=["answer"]) + query_job_mock.to_dataframe.return_value = mock_result + + ip.user_ns["custom_query"] = raw_sql + cell_body = "$custom_query" + assert "query_results_df" not in ip.user_ns + + with run_query_patch as run_query_mock: + run_query_mock.return_value = query_job_mock + + ip.run_cell_magic("bigquery", "query_results_df", cell_body) + + run_query_mock.assert_called_once_with(mock.ANY, raw_sql, mock.ANY) + + assert "query_results_df" in ip.user_ns # verify that the variable exists + df = ip.user_ns["query_results_df"] + assert len(df) == len(mock_result) # verify row count + assert list(df) == list(mock_result) # verify column names + assert list(df["answer"]) == [42] + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_nonexisting_query_variable(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + ip.user_ns.pop("custom_query", None) # Make sure the variable does NOT exist. + cell_body = "$custom_query" + + with pytest.raises( + NameError, match=r".*custom_query does not exist.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_empty_query_variable_name(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + cell_body = "$" + + with io.capture_output() as captured_io: + ip.run_cell_magic("bigquery", "", cell_body) + + output = captured_io.stderr + assert "ERROR:" in output + assert "must be a fully-qualified ID" in output + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + ipython_ns_cleanup.append((ip, "custom_query")) + + ip.user_ns["custom_query"] = object() + cell_body = "$custom_query" + + with pytest.raises( + TypeError, match=r".*must be string or bytes-like.*" + ), run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "", cell_body) + + run_query_mock.assert_not_called() + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_invalid_multiple_option_values(): From e5610d6135e3ebad9e0217f9cdf2aee78541e0e9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 8 Nov 2021 21:07:38 +0100 Subject: [PATCH 2/6] Fix missing indefinitive article in error msg --- google/cloud/bigquery/magics/magics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 67dfa6044..b0a14919e 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -611,8 +611,8 @@ def _cell_magic(line, query): else: if not isinstance(query, (str, bytes)): raise TypeError( - f"Query variable {query_var_name} must be string " - "or bytes-like value." + f"Query variable {query_var_name} must be a string " + "or a bytes-like value." ) # Any query that does not contain whitespace (aside from leading and trailing whitespace) From e9fd3668c25b3093d40913051a189c4f22feb56e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 8 Nov 2021 22:25:16 +0200 Subject: [PATCH 3/6] Adjust test assertion to error message change --- tests/unit/test_magics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 7d73732e7..cbfdced3f 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -1814,7 +1814,7 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): cell_body = "$custom_query" with pytest.raises( - TypeError, match=r".*must be string or bytes-like.*" + TypeError, match=r".*must be a string or a bytes-like.*" ), run_query_patch as run_query_mock: ip.run_cell_magic("bigquery", "", cell_body) From e26e0a88716ff78b6ae3f702bdf217a811cf9e6e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 10 Nov 2021 22:09:59 +0200 Subject: [PATCH 4/6] Refactor logic for extracting query variable --- google/cloud/bigquery/magics/magics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index fdddddc33..98cf14f2c 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -602,12 +602,12 @@ def _cell_magic(line, query): if query_var_name.isidentifier(): ip = IPython.get_ipython() - try: - query = ip.user_ns[query_var_name] - except KeyError: + query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel + + if query is ip: raise NameError( f"Unknown query, variable {query_var_name} does not exist." - ) from None + ) else: if not isinstance(query, (str, bytes)): raise TypeError( From 88e1638e459668c1300bb179d74dd8d4c2da154b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 11 Nov 2021 10:57:08 +0200 Subject: [PATCH 5/6] Explicitly warn about missing query variable name --- google/cloud/bigquery/magics/magics.py | 4 ++++ tests/unit/test_magics.py | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 98cf14f2c..5af0a3b51 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -600,6 +600,10 @@ def _cell_magic(line, query): if query.startswith("$"): query_var_name = query[1:] + if not query_var_name: + missing_msg = 'Missing query variable name, empty "$" is not allowed.' + raise NameError(missing_msg) + if query_var_name.isidentifier(): ip = IPython.get_ipython() query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index cbfdced3f..16c121d25 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -1735,7 +1735,7 @@ def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw query_job_mock.to_dataframe.return_value = mock_result ip.user_ns["custom_query"] = raw_sql - cell_body = "$custom_query" + cell_body = "$custom_query" # Referring to an existing variable name (custom_query) assert "query_results_df" not in ip.user_ns with run_query_patch as run_query_mock: @@ -1766,7 +1766,7 @@ def test_bigquery_magic_nonexisting_query_variable(): ) ip.user_ns.pop("custom_query", None) # Make sure the variable does NOT exist. - cell_body = "$custom_query" + cell_body = "$custom_query" # Referring to a non-existing variable name. with pytest.raises( NameError, match=r".*custom_query does not exist.*" @@ -1785,14 +1785,17 @@ def test_bigquery_magic_empty_query_variable_name(): google.auth.credentials.Credentials, instance=True ) - cell_body = "$" + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + cell_body = "$" # Not referring to any variable (name omitted). - with io.capture_output() as captured_io: + with pytest.raises( + NameError, match=r"(?i).*missing query variable name.*" + ), run_query_patch as run_query_mock: ip.run_cell_magic("bigquery", "", cell_body) - output = captured_io.stderr - assert "ERROR:" in output - assert "must be a fully-qualified ID" in output + run_query_mock.assert_not_called() @pytest.mark.usefixtures("ipython_interactive") @@ -1811,7 +1814,7 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): ipython_ns_cleanup.append((ip, "custom_query")) ip.user_ns["custom_query"] = object() - cell_body = "$custom_query" + cell_body = "$custom_query" # Referring to a non-string variable. with pytest.raises( TypeError, match=r".*must be a string or a bytes-like.*" From b6d43c11656efe749f1cc59a54ce604944753871 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 11 Nov 2021 12:16:22 +0200 Subject: [PATCH 6/6] Thest the query "variable" is not identifier case --- tests/unit/test_magics.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 16c121d25..e18d04d64 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -1824,6 +1824,29 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup): run_query_mock.assert_not_called() +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_query_variable_not_identifier(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + cell_body = "$123foo" # 123foo is not valid Python identifier + + with io.capture_output() as captured_io: + ip.run_cell_magic("bigquery", "", cell_body) + + # If "$" prefixes a string that is not a Python identifier, we do not treat such + # cell_body as a variable reference and just treat is as any other cell body input. + # If at the same time the cell body does not contain any whitespace, it is + # considered a table name, thus we expect an error that the table ID is not valid. + output = captured_io.stderr + assert "ERROR:" in output + assert "must be a fully-qualified ID" in output + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_with_invalid_multiple_option_values():