From d44d7004f9a60e3ab4bbae8e2a5e6986915638bd Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 30 Jun 2021 16:13:50 -0500
Subject: [PATCH 1/6] docs: pandas DataFrame samples are more standalone

---
 samples/conftest.py                           | 23 +++++++
 samples/quickstart/quickstart_test.py         |  5 --
 samples/to_dataframe/main_test.py             |  9 ++-
 samples/to_dataframe/noxfile.py               |  2 +-
 samples/to_dataframe/read_query_results.py    | 47 +++++++++++++
 .../to_dataframe/read_query_results_test.py   | 21 ++++++
 samples/to_dataframe/read_table_bigquery.py   | 42 ++++++++++++
 .../to_dataframe/read_table_bigquery_test.py  | 21 ++++++
 samples/to_dataframe/read_table_bqstorage.py  | 68 +++++++++++++++++++
 .../to_dataframe/read_table_bqstorage_test.py | 21 ++++++
 samples/to_dataframe/requirements.txt         |  8 +--
 11 files changed, 255 insertions(+), 12 deletions(-)
 create mode 100644 samples/conftest.py
 create mode 100644 samples/to_dataframe/read_query_results.py
 create mode 100644 samples/to_dataframe/read_query_results_test.py
 create mode 100644 samples/to_dataframe/read_table_bigquery.py
 create mode 100644 samples/to_dataframe/read_table_bigquery_test.py
 create mode 100644 samples/to_dataframe/read_table_bqstorage.py
 create mode 100644 samples/to_dataframe/read_table_bqstorage_test.py

diff --git a/samples/conftest.py b/samples/conftest.py
new file mode 100644
index 00000000..25b5afae
--- /dev/null
+++ b/samples/conftest.py
@@ -0,0 +1,23 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+
+@pytest.fixture(scope="session")
+def project_id():
+    return os.environ["GOOGLE_CLOUD_PROJECT"]
+
diff --git a/samples/quickstart/quickstart_test.py b/samples/quickstart/quickstart_test.py
index 23f3c350..0ae6848b 100644
--- a/samples/quickstart/quickstart_test.py
+++ b/samples/quickstart/quickstart_test.py
@@ -27,11 +27,6 @@ def now_millis():
     )
 
 
-@pytest.fixture()
-def project_id():
-    return os.environ["GOOGLE_CLOUD_PROJECT"]
-
-
 def test_quickstart_wo_snapshot(capsys, project_id):
     quickstart.main(project_id)
     out, _ = capsys.readouterr()
diff --git a/samples/to_dataframe/main_test.py b/samples/to_dataframe/main_test.py
index bda6d601..6e817f09 100644
--- a/samples/to_dataframe/main_test.py
+++ b/samples/to_dataframe/main_test.py
@@ -31,7 +31,10 @@ def clients():
     )
 
     # Make clients.
-    bqclient = bigquery.Client(credentials=credentials, project=your_project_id,)
+    bqclient = bigquery.Client(
+        credentials=credentials,
+        project=your_project_id,
+    )
     bqstorageclient = bigquery_storage.BigQueryReadClient(credentials=credentials)
     # [END bigquerystorage_pandas_tutorial_create_client]
     # [END bigquerystorage_pandas_tutorial_all]
@@ -124,7 +127,9 @@ def test_session_to_dataframe(capsys, clients):
         read_options=read_options,
     )
     read_session = bqstorageclient.create_read_session(
-        parent=parent, read_session=requested_session, max_stream_count=1,
+        parent=parent,
+        read_session=requested_session,
+        max_stream_count=1,
     )
 
     # This example reads from only a single stream. Read from multiple streams
diff --git a/samples/to_dataframe/noxfile.py b/samples/to_dataframe/noxfile.py
index 160fe728..b3c8658a 100644
--- a/samples/to_dataframe/noxfile.py
+++ b/samples/to_dataframe/noxfile.py
@@ -226,7 +226,7 @@ def py(session: nox.sessions.Session) -> None:
 
 
 def _get_repo_root() -> Optional[str]:
-    """ Returns the root folder of the project. """
+    """Returns the root folder of the project."""
     # Get root of this repository. Assume we don't have directories nested deeper than 10 items.
     p = Path(os.getcwd())
     for i in range(10):
diff --git a/samples/to_dataframe/read_query_results.py b/samples/to_dataframe/read_query_results.py
new file mode 100644
index 00000000..6551eff5
--- /dev/null
+++ b/samples/to_dataframe/read_query_results.py
@@ -0,0 +1,47 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def read_query_results():
+    # [START bigquerystorage_pandas_tutorial_read_query_results]
+    from google.cloud import bigquery
+
+    bqclient = bigquery.Client()
+
+    # Download query results.
+    query_string = """
+    SELECT
+    CONCAT(
+        'https://stackoverflow.com/questions/',
+        CAST(id as STRING)) as url,
+    view_count
+    FROM `bigquery-public-data.stackoverflow.posts_questions`
+    WHERE tags like '%google-bigquery%'
+    ORDER BY view_count DESC
+    """
+
+    dataframe = (
+        bqclient.query(query_string)
+        .result()
+        .to_dataframe(
+            # Optionally, explicitly request to use the BigQuery Storage API. As of
+            # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage
+            # API is used by default.
+            create_bqstorage_client=True,
+        )
+    )
+    print(dataframe.head())
+    # [END bigquerystorage_pandas_tutorial_read_query_results
+
+    return dataframe
diff --git a/samples/to_dataframe/read_query_results_test.py b/samples/to_dataframe/read_query_results_test.py
new file mode 100644
index 00000000..55b55a08
--- /dev/null
+++ b/samples/to_dataframe/read_query_results_test.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import read_query_results
+
+
+def test_read_query_results(capsys):
+    read_query_results.read_query_results()
+    out, _ = capsys.readouterr()
+    assert "stackoverflow" in out
diff --git a/samples/to_dataframe/read_table_bigquery.py b/samples/to_dataframe/read_table_bigquery.py
new file mode 100644
index 00000000..82d8879b
--- /dev/null
+++ b/samples/to_dataframe/read_table_bigquery.py
@@ -0,0 +1,42 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def read_table():
+    # [START bigquerystorage_pandas_tutorial_read_table]
+    from google.cloud import bigquery
+
+    bqclient = bigquery.Client()
+
+    # Download a table.
+    table = bigquery.TableReference.from_string(
+        "bigquery-public-data.utility_us.country_code_iso"
+    )
+    rows = bqclient.list_rows(
+        table,
+        selected_fields=[
+            bigquery.SchemaField("country_name", "STRING"),
+            bigquery.SchemaField("fips_code", "STRING"),
+        ],
+    )
+    dataframe = rows.to_dataframe(
+        # Optionally, explicitly request to use the BigQuery Storage API. As of
+        # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage
+        # API is used by default.
+        create_bqstorage_client=True,
+    )
+    print(dataframe.head())
+    # [END bigquerystorage_pandas_tutorial_read_table]
+
+    return dataframe
diff --git a/samples/to_dataframe/read_table_bigquery_test.py b/samples/to_dataframe/read_table_bigquery_test.py
new file mode 100644
index 00000000..c8301857
--- /dev/null
+++ b/samples/to_dataframe/read_table_bigquery_test.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import read_table_bigquery
+
+
+def test_read_table(capsys):
+    read_table_bigquery.read_table()
+    out, _ = capsys.readouterr()
+    assert "country_name" in out
diff --git a/samples/to_dataframe/read_table_bqstorage.py b/samples/to_dataframe/read_table_bqstorage.py
new file mode 100644
index 00000000..be3aac9e
--- /dev/null
+++ b/samples/to_dataframe/read_table_bqstorage.py
@@ -0,0 +1,68 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def read_table(your_project_id):
+    original_your_project_id = your_project_id
+    # [START bigquerystorage_pandas_tutorial_read_session]
+    your_project_id = "project-for-read-session"
+    # [END bigquerystorage_pandas_tutorial_read_session]
+    your_project_id = original_your_project_id
+
+    # [START bigquerystorage_pandas_tutorial_read_session]
+    from google.cloud import bigquery_storage
+    from google.cloud.bigquery_storage import types
+
+    bqstorageclient = bigquery_storage.BigQueryReadClient()
+
+    project_id = "bigquery-public-data"
+    dataset_id = "new_york_trees"
+    table_id = "tree_species"
+    table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}"
+
+    # Select columns to read with read options. If no read options are
+    # specified, the whole table is read.
+    read_options = types.ReadSession.TableReadOptions(
+        selected_fields=["species_common_name", "fall_color"]
+    )
+
+    parent = "projects/{}".format(your_project_id)
+
+    requested_session = types.ReadSession(
+        table=table,
+        # Avro is also supported, but the Arrow data format is optimized to
+        # work well with column-oriented data structures such as pandas
+        # DataFrames.
+        data_format=types.DataFormat.ARROW,
+        read_options=read_options,
+    )
+    read_session = bqstorageclient.create_read_session(
+        parent=parent,
+        read_session=requested_session,
+        max_stream_count=1,
+    )
+
+    # This example reads from only a single stream. Read from multiple streams
+    # to fetch data faster. Note that the session may not contain any streams
+    # if there are no rows to read.
+    stream = read_session.streams[0]
+    reader = bqstorageclient.read_rows(stream.name)
+
+    # Parse all Arrow blocks and create a dataframe. This call requires a
+    # session, because the session contains the schema for the row blocks.
+    dataframe = reader.to_dataframe(read_session)
+    print(dataframe.head())
+    # [END bigquerystorage_pandas_tutorial_read_session]
+
+    return dataframe
diff --git a/samples/to_dataframe/read_table_bqstorage_test.py b/samples/to_dataframe/read_table_bqstorage_test.py
new file mode 100644
index 00000000..cc093078
--- /dev/null
+++ b/samples/to_dataframe/read_table_bqstorage_test.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import read_table_bqstorage
+
+
+def test_read_table(capsys, project_id):
+    read_table_bqstorage.read_table(your_project_id=project_id)
+    out, _ = capsys.readouterr()
+    assert "species_common_name" in out
diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt
index 455e6894..2f8dc500 100644
--- a/samples/to_dataframe/requirements.txt
+++ b/samples/to_dataframe/requirements.txt
@@ -2,7 +2,7 @@ google-auth==1.32.0
 google-cloud-bigquery-storage==2.4.0
 google-cloud-bigquery==2.20.0
 pyarrow==4.0.1
-ipython==7.10.2; python_version > '3.0'
-ipython==5.9.0; python_version < '3.0'
-pandas==0.25.3; python_version > '3.0'
-pandas==0.24.2; python_version < '3.0'
+ipython==7.24.0; python_version > '3.6'
+ipython==7.16.1; python_version <= '3.6'
+pandas==1.2.5; python_version > '3.6'
+pandas==1.1.5; python_version <= '3.6'

From 94df06394019a86da87a5514de9b9cbaf0fbe703 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 30 Jun 2021 16:18:30 -0500
Subject: [PATCH 2/6] fix region tag

---
 samples/to_dataframe/read_query_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/to_dataframe/read_query_results.py b/samples/to_dataframe/read_query_results.py
index 6551eff5..1660ee7b 100644
--- a/samples/to_dataframe/read_query_results.py
+++ b/samples/to_dataframe/read_query_results.py
@@ -42,6 +42,6 @@ def read_query_results():
         )
     )
     print(dataframe.head())
-    # [END bigquerystorage_pandas_tutorial_read_query_results
+    # [END bigquerystorage_pandas_tutorial_read_query_result]
 
     return dataframe

From d4221828874811cc8e211da1329ad8b2e85b89e0 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 30 Jun 2021 16:19:19 -0500
Subject: [PATCH 3/6] fix region tag

---
 samples/to_dataframe/read_query_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/to_dataframe/read_query_results.py b/samples/to_dataframe/read_query_results.py
index 1660ee7b..45bae1ea 100644
--- a/samples/to_dataframe/read_query_results.py
+++ b/samples/to_dataframe/read_query_results.py
@@ -42,6 +42,6 @@ def read_query_results():
         )
     )
     print(dataframe.head())
-    # [END bigquerystorage_pandas_tutorial_read_query_result]
+    # [END bigquerystorage_pandas_tutorial_read_query_results]
 
     return dataframe

From 87ff7fac7d10062cd865828644b85ea0fe7ed047 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 30 Jun 2021 16:38:40 -0500
Subject: [PATCH 4/6] remove unused imports

---
 samples/quickstart/quickstart_test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/samples/quickstart/quickstart_test.py b/samples/quickstart/quickstart_test.py
index 0ae6848b..8e1e0dfd 100644
--- a/samples/quickstart/quickstart_test.py
+++ b/samples/quickstart/quickstart_test.py
@@ -13,9 +13,6 @@
 # limitations under the License.
 
 import datetime
-import os
-
-import pytest
 
 from . import quickstart
 

From fdd8b6b3932e313ef7c559cf85285dba8a5e47d9 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 1 Jul 2021 09:53:10 -0500
Subject: [PATCH 5/6] blacken

---
 samples/conftest.py                          | 1 -
 samples/to_dataframe/main_test.py            | 9 ++-------
 samples/to_dataframe/read_table_bqstorage.py | 4 +---
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/samples/conftest.py b/samples/conftest.py
index 25b5afae..92068ef5 100644
--- a/samples/conftest.py
+++ b/samples/conftest.py
@@ -20,4 +20,3 @@
 @pytest.fixture(scope="session")
 def project_id():
     return os.environ["GOOGLE_CLOUD_PROJECT"]
-
diff --git a/samples/to_dataframe/main_test.py b/samples/to_dataframe/main_test.py
index 6e817f09..bda6d601 100644
--- a/samples/to_dataframe/main_test.py
+++ b/samples/to_dataframe/main_test.py
@@ -31,10 +31,7 @@ def clients():
     )
 
     # Make clients.
-    bqclient = bigquery.Client(
-        credentials=credentials,
-        project=your_project_id,
-    )
+    bqclient = bigquery.Client(credentials=credentials, project=your_project_id,)
     bqstorageclient = bigquery_storage.BigQueryReadClient(credentials=credentials)
     # [END bigquerystorage_pandas_tutorial_create_client]
     # [END bigquerystorage_pandas_tutorial_all]
@@ -127,9 +124,7 @@ def test_session_to_dataframe(capsys, clients):
         read_options=read_options,
     )
     read_session = bqstorageclient.create_read_session(
-        parent=parent,
-        read_session=requested_session,
-        max_stream_count=1,
+        parent=parent, read_session=requested_session, max_stream_count=1,
     )
 
     # This example reads from only a single stream. Read from multiple streams
diff --git a/samples/to_dataframe/read_table_bqstorage.py b/samples/to_dataframe/read_table_bqstorage.py
index be3aac9e..63914ea0 100644
--- a/samples/to_dataframe/read_table_bqstorage.py
+++ b/samples/to_dataframe/read_table_bqstorage.py
@@ -48,9 +48,7 @@ def read_table(your_project_id):
         read_options=read_options,
     )
     read_session = bqstorageclient.create_read_session(
-        parent=parent,
-        read_session=requested_session,
-        max_stream_count=1,
+        parent=parent, read_session=requested_session, max_stream_count=1,
     )
 
     # This example reads from only a single stream. Read from multiple streams

From 0b7fc64f1a4136db50fc078af0826fa053de767c Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 9 Jul 2021 16:37:08 -0500
Subject: [PATCH 6/6] remove session from call to rows/to_dataframe

---
 samples/to_dataframe/read_table_bqstorage.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/samples/to_dataframe/read_table_bqstorage.py b/samples/to_dataframe/read_table_bqstorage.py
index 63914ea0..0a3ae777 100644
--- a/samples/to_dataframe/read_table_bqstorage.py
+++ b/samples/to_dataframe/read_table_bqstorage.py
@@ -23,6 +23,7 @@ def read_table(your_project_id):
     # [START bigquerystorage_pandas_tutorial_read_session]
     from google.cloud import bigquery_storage
     from google.cloud.bigquery_storage import types
+    import pandas
 
     bqstorageclient = bigquery_storage.BigQueryReadClient()
 
@@ -57,9 +58,11 @@ def read_table(your_project_id):
     stream = read_session.streams[0]
     reader = bqstorageclient.read_rows(stream.name)
 
-    # Parse all Arrow blocks and create a dataframe. This call requires a
-    # session, because the session contains the schema for the row blocks.
-    dataframe = reader.to_dataframe(read_session)
+    # Parse all Arrow blocks and create a dataframe.
+    frames = []
+    for message in reader.rows().pages:
+        frames.append(message.to_dataframe())
+    dataframe = pandas.concat(frames)
     print(dataframe.head())
     # [END bigquerystorage_pandas_tutorial_read_session]