From 0e3dba27f3b6d4cd4c1cba751901866bb9884248 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Mon, 2 Aug 2021 16:44:04 -0400
Subject: [PATCH 01/40] Removed unused method

---
 tests/system/test_client.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/system/test_client.py b/tests/system/test_client.py
index baa2b6ad8..abe3e335d 100644
--- a/tests/system/test_client.py
+++ b/tests/system/test_client.py
@@ -2330,9 +2330,6 @@ def test_create_table_rows_fetch_nested_schema(self):
             self.assertEqual(found[7], e_favtime)
             self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"]))
 
-    def _fetch_dataframe(self, query):
-        return Config.CLIENT.query(query).result().to_dataframe()
-
     @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
     @unittest.skipIf(
         bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"

From ff6c4ff6588f9e279c878a63be64ccf0b7d97722 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 10:55:23 -0400
Subject: [PATCH 02/40] Implemented geography_as_object argument to
 to_dataframe and implemented to_geodataframe

---
 google/cloud/bigquery/job/query.py  |  88 ++++++++++++++
 google/cloud/bigquery/table.py      | 178 ++++++++++++++++++++++++++++
 setup.py                            |   1 +
 tests/system/test_pandas.py         |  53 +++++++++
 tests/unit/job/test_query_pandas.py |  75 ++++++++++--
 tests/unit/test_table.py            | 122 +++++++++++++++++++
 6 files changed, 505 insertions(+), 12 deletions(-)

diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index 2cb7ee28e..eb7027131 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -1423,6 +1423,7 @@ def to_dataframe(
         create_bqstorage_client: bool = True,
         date_as_object: bool = True,
         max_results: Optional[int] = None,
+        geography_as_object: bool = False,
     ) -> "pandas.DataFrame":
         """Return a pandas DataFrame from a QueryJob
 
@@ -1489,6 +1490,93 @@ def to_dataframe(
             progress_bar_type=progress_bar_type,
             create_bqstorage_client=create_bqstorage_client,
             date_as_object=date_as_object,
+            geography_as_object=geography_as_object,
+        )
+
+    # If changing the signature of this method, make sure to apply the same
+    # changes to table.RowIterator.to_dataframe(), except for the max_results parameter
+    # that should only exist here in the QueryJob method.
+    def to_geodataframe(
+        self,
+        bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+        dtypes: Dict[str, Any] = None,
+        progress_bar_type: str = None,
+        create_bqstorage_client: bool = True,
+        date_as_object: bool = True,
+        max_results: Optional[int] = None,
+        geography_column: Optional[str] = None,
+    ) -> "pandas.DataFrame":
+        """Return a pandas DataFrame from a QueryJob
+
+        Args:
+            bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
+                A BigQuery Storage API client. If supplied, use the faster
+                BigQuery Storage API to fetch rows from BigQuery. This
+                API is a billable API.
+
+                This method requires the ``fastavro`` and
+                ``google-cloud-bigquery-storage`` libraries.
+
+                Reading from a specific partition or snapshot is not
+                currently supported by this method.
+
+            dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
+                A dictionary of column names pandas ``dtype``s. The provided
+                ``dtype`` is used when constructing the series for the column
+                specified. Otherwise, the default pandas behavior is used.
+
+            progress_bar_type (Optional[str]):
+                If set, use the `tqdm <https://tqdm.github.io/>`_ library to
+                display a progress bar while the data downloads. Install the
+                ``tqdm`` package to use this feature.
+
+                See
+                :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`
+                for details.
+
+                .. versionadded:: 1.11.0
+            create_bqstorage_client (Optional[bool]):
+                If ``True`` (default), create a BigQuery Storage API client
+                using the default API settings. The BigQuery Storage API
+                is a faster way to fetch rows from BigQuery. See the
+                ``bqstorage_client`` parameter for more information.
+
+                This argument does nothing if ``bqstorage_client`` is supplied.
+
+                .. versionadded:: 1.24.0
+
+            date_as_object (Optional[bool]):
+                If ``True`` (default), cast dates to objects. If ``False``, convert
+                to datetime64[ns] dtype.
+
+                .. versionadded:: 1.26.0
+
+            max_results (Optional[int]):
+                Maximum number of rows to include in the result. No limit by default.
+
+                .. versionadded:: 2.21.0
+
+            geography_column (Optional[str]):
+                If there are more than one GEOGRAPHY columns, which one to use
+                to construct a geopandas GeoDataFrame.  This option can be ommitted
+                if there's only one GEOGRAPHY column.
+
+        Returns:
+            A :class:`~pandas.DataFrame` populated with row data and column
+            headers from the query results. The column headers are derived
+            from the destination table's schema.
+
+        Raises:
+            ValueError: If the `pandas` library cannot be imported.
+        """
+        query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
+        return query_result.to_geodataframe(
+            bqstorage_client=bqstorage_client,
+            dtypes=dtypes,
+            progress_bar_type=progress_bar_type,
+            create_bqstorage_client=create_bqstorage_client,
+            date_as_object=date_as_object,
+            geography_column=geography_column,
         )
 
     def __iter__(self):
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index daade1ac6..06f08f181 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -30,6 +30,18 @@
 except ImportError:  # pragma: NO COVER
     pandas = None
 
+try:
+    import geopandas
+except ImportError:
+    geopandas = None
+else:
+    CRS = 'EPSG:4326'
+
+try:
+    from shapely import wkt
+except ImportError:
+    wkt = None
+
 try:
     import pyarrow
 except ImportError:  # pragma: NO COVER
@@ -61,6 +73,14 @@
     "The pandas library is not installed, please install "
     "pandas to use the to_dataframe() function."
 )
+_NO_GEOPANDAS_ERROR = (
+    "The geopandas library is not installed, please install "
+    "geopandas to use the to_geodataframe() function."
+)
+_NO_SHAPELY_ERROR = (
+    "The shapely library is not installed, please install "
+    "shapely to use the geography_as_object option."
+)
 _NO_PYARROW_ERROR = (
     "The pyarrow library is not installed, please install "
     "pyarrow to use the to_arrow() function."
@@ -1841,6 +1861,7 @@ def to_dataframe(
         progress_bar_type: str = None,
         create_bqstorage_client: bool = True,
         date_as_object: bool = True,
+        geography_as_object: bool = False,
     ) -> "pandas.DataFrame":
         """Create a pandas DataFrame by loading all pages of a query.
 
@@ -1896,6 +1917,12 @@ def to_dataframe(
 
                 .. versionadded:: 1.26.0
 
+            geography_as_object (Optional[bool]):
+                If ``True``, convert geography data to shapely objects.
+                If ``False`` (default), don't cast geography data to shapely objects.
+
+                .. versionadded:: ???
+
         Returns:
             pandas.DataFrame:
                 A :class:`~pandas.DataFrame` populated with row data and column
@@ -1911,6 +1938,9 @@ def to_dataframe(
         """
         if pandas is None:
             raise ValueError(_NO_PANDAS_ERROR)
+        if geography_as_object and wkt is None:
+            raise ValueError(_NO_SHAPELY_ERROR)
+
         if dtypes is None:
             dtypes = {}
 
@@ -1951,8 +1981,130 @@ def to_dataframe(
         for column in dtypes:
             df[column] = pandas.Series(df[column], dtype=dtypes[column])
 
+        if geography_as_object:
+            for field in self.schema:
+                if field.field_type.upper() == 'GEOGRAPHY':
+                    df[field.name] = df[field.name].dropna().apply(wkt.loads)
+
         return df
 
+    # If changing the signature of this method, make sure to apply the same
+    # changes to job.QueryJob.to_geodataframe()
+    def to_geodataframe(
+        self,
+        bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+        dtypes: Dict[str, Any] = None,
+        progress_bar_type: str = None,
+        create_bqstorage_client: bool = True,
+        date_as_object: bool = True,
+        geography_column: Optional[str] = None,
+    ) -> "pandas.DataFrame":
+        """Create a pandas DataFrame by loading all pages of a query.
+
+        Args:
+            bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
+                A BigQuery Storage API client. If supplied, use the faster
+                BigQuery Storage API to fetch rows from BigQuery.
+
+                This method requires the ``pyarrow`` and
+                ``google-cloud-bigquery-storage`` libraries.
+
+                This method only exposes a subset of the capabilities of the
+                BigQuery Storage API. For full access to all features
+                (projections, filters, snapshots) use the Storage API directly.
+
+            dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
+                A dictionary of column names pandas ``dtype``s. The provided
+                ``dtype`` is used when constructing the series for the column
+                specified. Otherwise, the default pandas behavior is used.
+            progress_bar_type (Optional[str]):
+                If set, use the `tqdm <https://tqdm.github.io/>`_ library to
+                display a progress bar while the data downloads. Install the
+                ``tqdm`` package to use this feature.
+
+                Possible values of ``progress_bar_type`` include:
+
+                ``None``
+                  No progress bar.
+                ``'tqdm'``
+                  Use the :func:`tqdm.tqdm` function to print a progress bar
+                  to :data:`sys.stderr`.
+                ``'tqdm_notebook'``
+                  Use the :func:`tqdm.tqdm_notebook` function to display a
+                  progress bar as a Jupyter notebook widget.
+                ``'tqdm_gui'``
+                  Use the :func:`tqdm.tqdm_gui` function to display a
+                  progress bar as a graphical dialog box.
+
+            create_bqstorage_client (Optional[bool]):
+                If ``True`` (default), create a BigQuery Storage API client
+                using the default API settings. The BigQuery Storage API
+                is a faster way to fetch rows from BigQuery. See the
+                ``bqstorage_client`` parameter for more information.
+
+                This argument does nothing if ``bqstorage_client`` is supplied.
+
+            date_as_object (Optional[bool]):
+                If ``True`` (default), cast dates to objects. If ``False``, convert
+                to datetime64[ns] dtype.
+
+            geography_column (Optional[str]):
+                If there are more than one GEOGRAPHY columns, which one to use
+                to construct a geopandas GeoDataFrame.  This option can be ommitted
+                if there's only one GEOGRAPHY column.
+
+        Returns:
+            geopandas.GeoDataFrame:
+                A :class:`~pandas.DataFrame` populated with row data and column
+                headers from the query results. The column headers are derived
+                from the destination table's schema.
+
+        Raises:
+            ValueError:
+                If the :mod:`pandas` library cannot be imported, or the
+                :mod:`google.cloud.bigquery_storage_v1` module is
+                required but cannot be imported.
+
+
+        .. versionadded:: ???
+
+        """
+        if geopandas is None:
+            raise ValueError(_NO_GEOPANDAS_ERROR)
+
+        geography_columns = set(
+            field.name
+            for field in self.schema
+            if field.field_type.upper() == 'GEOGRAPHY'
+        )
+        if not geography_columns:
+            raise TypeError("There must be at least one GEOGRAPHY column"
+                            " to create a GeoDataFrame")
+
+        if geography_column:
+            if geography_column not in geography_columns:
+                raise ValueError(
+                    f"The given geography column, {geography_column} doesn't name"
+                    f" a GEOGRAPHY column in the result.")
+        elif len(geography_columns) == 1:
+            [geography_column] = geography_columns
+        else:
+            raise ValueError(
+                "There is more than one GEOGRAPHY column in the result. "
+                "The geography_column argument must be used to specify which "
+                "one to use to create a GeoDataFrame")
+
+        df = self.to_dataframe(
+            bqstorage_client,
+            dtypes,
+            progress_bar_type,
+            create_bqstorage_client,
+            date_as_object,
+            geography_as_object=True,
+            )
+
+        return geopandas.GeoDataFrame(df, crs=CRS, geometry=geography_column)
+
 
 class _EmptyRowIterator(RowIterator):
     """An empty row iterator.
@@ -2005,6 +2157,7 @@ def to_dataframe(
         progress_bar_type=None,
         create_bqstorage_client=True,
         date_as_object=True,
+        geography_as_object=False,
     ) -> "pandas.DataFrame":
         """Create an empty dataframe.
 
@@ -2022,6 +2175,31 @@ def to_dataframe(
             raise ValueError(_NO_PANDAS_ERROR)
         return pandas.DataFrame()
 
+    def to_geodataframe(
+        self,
+        bqstorage_client=None,
+        dtypes=None,
+        progress_bar_type=None,
+        create_bqstorage_client=True,
+        date_as_object=True,
+        geography_column: Optional[str] = None,
+    ) -> "pandas.DataFrame":
+        """Create an empty dataframe.
+
+        Args:
+            bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.
+            dtypes (Any): Ignored. Added for compatibility with RowIterator.
+            progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
+            create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
+            date_as_object (bool): Ignored. Added for compatibility with RowIterator.
+
+        Returns:
+            pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
+        """
+        if geopandas is None:
+            raise ValueError(_NO_GEOPANDAS_ERROR)
+        return geopandas.GeoDataFrame()
+
     def to_dataframe_iterable(
         self,
         bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
diff --git a/setup.py b/setup.py
index e9deaf117..d01f3dcf6 100644
--- a/setup.py
+++ b/setup.py
@@ -57,6 +57,7 @@
         "pyarrow >= 1.0.0, < 6.0dev",
     ],
     "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"],
+    "geopandas": ["geopandas>=0.6.0"],
     "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
     "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
     "opentelemetry": [
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 821b375e1..9bbdac642 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -804,3 +804,56 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
         dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
 
     assert len(dataframe.index) == 100
+
+
+def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
+    bigquery_client.query(
+        f"create table {dataset_id}.lake (name string, geog geography)"
+    ).result()
+    bigquery_client.query(
+        f"""
+        insert into {dataset_id}.lake (name, geog) values
+        ('foo', st_geogfromtext('point(0 0)')),
+        ('bar', st_geogfromtext('point(0 1)')),
+        ('baz', null)
+        """
+        ).result()
+    df = (bigquery_client
+          .query(f"select * from {dataset_id}.lake order by name")
+          .to_dataframe(geography_as_object=True)
+          )
+    assert str(df) == (
+        '  name         geog\n'
+        '0  bar  POINT (0 1)\n'
+        '1  baz          NaN\n'
+        '2  foo  POINT (0 0)')
+    assert [v.__class__.__name__ for v in df['geog']] == ['Point', 'float', 'Point']
+
+
+def test_to_geodataframe(bigquery_client, dataset_id):
+    bigquery_client.query(
+        f"create table {dataset_id}.lake (name string, geog geography)"
+    ).result()
+    bigquery_client.query(
+        f"""
+        insert into {dataset_id}.lake (name, geog) values
+        ('foo', st_geogfromtext('point(0 0)')),
+        ('bar', st_geogfromtext('polygon((0 0, 1 1, 1 0, 0 0))')),
+        ('baz', null)
+        """
+        ).result()
+    df = (bigquery_client
+          .query(f"select * from {dataset_id}.lake order by name")
+          .to_geodataframe()
+          )
+    assert [v.__class__.__name__ for v in df['geog']] == [
+        'Polygon', 'NoneType', 'Point']
+    assert df.__class__.__name__ == 'GeoDataFrame'
+    assert df['geog'].__class__.__name__ == 'GeoSeries'
+    assert list(map(str, df['geog'])) == [
+        'POLYGON ((1 0, 1 1, 0 0, 1 0))', 'None', 'POINT (0 0)']
+    assert str(df.area) == (
+        '0    0.5\n'
+        '1    NaN\n'
+        '2    0.0\n'
+        'dtype: float64')
diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index c537802f4..f973fb7ee 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -23,6 +23,14 @@
     import pandas
 except (ImportError, AttributeError):  # pragma: NO COVER
     pandas = None
+try:
+    import shapely
+except (ImportError, AttributeError):  # pragma: NO COVER
+    shapely = None
+try:
+    import geopandas
+except (ImportError, AttributeError):  # pragma: NO COVER
+    geopandas = None
 try:
     import pyarrow
 except (ImportError, AttributeError):  # pragma: NO COVER
@@ -425,29 +433,26 @@ def test_to_arrow_w_tqdm_wo_query_plan():
     result_patch_tqdm.assert_called()
 
 
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_to_dataframe():
+def _make_job(schema=(), rows=()):
     from google.cloud.bigquery.job import QueryJob as target_class
 
     begun_resource = _make_job_resource(job_type="query")
     query_resource = {
         "jobComplete": True,
         "jobReference": begun_resource["jobReference"],
-        "totalRows": "4",
+        "totalRows": str(len(rows)),
         "schema": {
             "fields": [
-                {"name": "name", "type": "STRING", "mode": "NULLABLE"},
-                {"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
-            ]
+                dict(name=field[0], type=field[1], mode=field[2])
+                for field in schema
+                ]
         },
     }
     tabledata_resource = {
         "rows": [
-            {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
-            {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
-            {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
-            {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
-        ]
+            {"f": [{"v": v} for v in row]}
+            for row in rows
+            ]
     }
     done_resource = copy.deepcopy(begun_resource)
     done_resource["status"] = {"state": "DONE"}
@@ -455,8 +460,18 @@ def test_to_dataframe():
         begun_resource, query_resource, done_resource, tabledata_resource
     )
     client = _make_client(connection=connection)
-    job = target_class.from_api_repr(begun_resource, client)
+    return target_class.from_api_repr(begun_resource, client)
+
 
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_to_dataframe():
+    job = _make_job(
+        (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")),
+        (("Phred Phlyntstone", "32"),
+         ("Bharney Rhubble", "33"),
+         ("Wylma Phlyntstone", "29"),
+         ("Bhettye Rhubble", "27"),
+         ))
     df = job.to_dataframe(create_bqstorage_client=False)
 
     assert isinstance(df, pandas.DataFrame)
@@ -868,3 +883,39 @@ def test_to_dataframe_w_tqdm_max_results():
     result_patch_tqdm.assert_called_with(
         timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3
     )
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(shapely is None, reason="Requires `shapely`")
+def test_to_dataframe_geography_as_object():
+    job = _make_job(
+        (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
+        (("Phred Phlyntstone", "Point(0 0)"),
+         ("Bharney Rhubble", "Point(0 1)"),
+         ("Wylma Phlyntstone", None),
+         ))
+    df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True)
+
+    assert isinstance(df, pandas.DataFrame)
+    assert len(df) == 3  # verify the number of rows
+    assert list(df) == ["name", "geog"]  # verify the column names
+    assert [v.__class__.__name__ for v in df.geog] == [
+        'Point', 'Point', 'float']  # float because nan
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_to_geodataframe():
+    job = _make_job(
+        (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
+        (("Phred Phlyntstone", "Point(0 0)"),
+         ("Bharney Rhubble", "Point(0 1)"),
+         ("Wylma Phlyntstone", None),
+         ))
+    df = job.to_geodataframe(create_bqstorage_client=False)
+
+    assert isinstance(df, geopandas.GeoDataFrame)
+    assert len(df) == 3  # verify the number of rows
+    assert list(df) == ["name", "geog"]  # verify the column names
+    assert [v.__class__.__name__ for v in df.geog] == [
+        'Point', 'Point', 'NoneType']  # float because nan
+    assert isinstance(df.geog, geopandas.GeoSeries)
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 4b1fd833b..e09e85774 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -41,6 +41,11 @@
 except (ImportError, AttributeError):  # pragma: NO COVER
     pandas = None
 
+try:
+    import geopandas
+except (ImportError, AttributeError):  # pragma: NO COVER
+    geopandas = None
+
 try:
     import pyarrow
     import pyarrow.types
@@ -1665,6 +1670,13 @@ def test_to_dataframe_iterable(self):
         self.assertEqual(len(df), 0)  # Verify the number of rows.
         self.assertEqual(len(df.columns), 0)
 
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    def test_to_geodataframe(self):
+        row_iterator = self._make_one()
+        df = row_iterator.to_geodataframe(create_bqstorage_client=False)
+        self.assertIsInstance(df, geopandas.GeoDataFrame)
+        self.assertEqual(len(df), 0)  # verify the number of rows
+
 
 class TestRowIterator(unittest.TestCase):
     def _class_under_test(self):
@@ -1702,6 +1714,18 @@ def _make_one(
             client, api_request, path, schema, table=table, **kwargs
         )
 
+    def _make_one_from_data(self, schema=(), rows=()):
+        from google.cloud.bigquery.schema import SchemaField
+        schema = [SchemaField(*a) for a in schema]
+        rows = [
+            {"f": [{"v": v} for v in row]}
+            for row in rows
+            ]
+
+        path = "/foo"
+        api_request = mock.Mock(return_value={"rows": rows})
+        return self._make_one(_mock_client(), api_request, path, schema)
+
     def test_constructor(self):
         from google.cloud.bigquery.table import _item_to_row
         from google.cloud.bigquery.table import _rows_page_start
@@ -2996,6 +3020,13 @@ def test_to_dataframe_error_if_pandas_is_none(self):
         with self.assertRaises(ValueError):
             row_iterator.to_dataframe()
 
+    @mock.patch("google.cloud.bigquery.table.wkt", new=None)
+    def test_to_dataframe_error_if_shapely_wkt_is_none(self):
+        with self.assertRaises(ValueError):
+            # No can do if no shapely
+            self._make_one_from_data().to_dataframe(geography_as_object=True)
+
+
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     def test_to_dataframe_max_results_w_bqstorage_warning(self):
         from google.cloud.bigquery.schema import SchemaField
@@ -3753,6 +3784,97 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self):
         # Don't close the client if it was passed in.
         bqstorage_client._transport.grpc_channel.close.assert_not_called()
 
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    def test_to_dataframe_geography_as_object(self):
+        row_iterator = self._make_one_from_data(
+            (("name", "STRING"), ("geog", "GEOGRAPHY")),
+            (("foo", "Point(0 0)"),
+             ("bar", None),
+             ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+             ))
+        df = row_iterator.to_dataframe(
+            create_bqstorage_client=False, geography_as_object=True,
+        )
+        self.assertIsInstance(df, pandas.DataFrame)
+        self.assertEqual(len(df), 3)  # verify the number of rows
+        self.assertEqual(list(df), ["name", "geog"])  # verify the column names
+        self.assertEqual(df.name.dtype.name, "object")
+        self.assertEqual(df.geog.dtype.name, "object")
+        self.assertIsInstance(df.geog, pandas.Series)
+        self.assertEqual([v.__class__.__name__ for v in df.geog],
+                         ['Point', 'float', 'Polygon'])
+
+    @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
+    def test_to_geodataframe_error_if_geopandas_is_none(self):
+        with self.assertRaises(ValueError):
+            # No can do if no shapely
+            self._make_one_from_data().to_geodataframe()
+
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    def test_to_geodataframe(self):
+        row_iterator = self._make_one_from_data(
+            (("name", "STRING"), ("geog", "GEOGRAPHY")),
+            (("foo", "Point(0 0)"),
+             ("bar", None),
+             ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+             ))
+        df = row_iterator.to_geodataframe(create_bqstorage_client=False)
+        self.assertIsInstance(df, geopandas.GeoDataFrame)
+        self.assertEqual(len(df), 3)  # verify the number of rows
+        self.assertEqual(list(df), ["name", "geog"])  # verify the column names
+        self.assertEqual(df.name.dtype.name, "object")
+        self.assertEqual(df.geog.dtype.name, "geometry")
+        self.assertIsInstance(df.geog, geopandas.GeoSeries)
+        self.assertEqual(list(map(str, df.area)), ['0.0', 'nan', '0.5'])
+        self.assertEqual(list(map(str, df.geog.area)), ['0.0', 'nan', '0.5'])
+
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    def test_to_geodataframe_ambiguous_geog(self):
+        row_iterator = self._make_one_from_data(
+            (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")),
+            ())
+        with self.assertRaises(ValueError):
+            row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    def test_to_geodataframe_no_geog(self):
+        row_iterator = self._make_one_from_data(
+            (("name", "STRING"), ("geog", "STRING")),
+            ())
+        with self.assertRaises(TypeError):
+            row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    def test_to_geodataframe_w_geography_column(self):
+        row_iterator = self._make_one_from_data(
+            (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")),
+            (("foo", "Point(0 0)", "Point(1 1)"),
+             ("bar", None, "Point(2 2)"),
+             ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"),
+             ))
+        df = row_iterator.to_geodataframe(
+            create_bqstorage_client=False, geography_column='geog'
+        )
+        self.assertIsInstance(df, geopandas.GeoDataFrame)
+        self.assertEqual(len(df), 3)  # verify the number of rows
+        self.assertEqual(list(df), ["name", "geog", "geog2"])  # verify the column names
+        self.assertEqual(df.name.dtype.name, "object")
+        self.assertEqual(df.geog.dtype.name, "geometry")
+        self.assertEqual(df.geog2.dtype.name, "object")
+        self.assertIsInstance(df.geog, geopandas.GeoSeries)
+        self.assertEqual(list(map(str, df.area)), ['0.0', 'nan', '0.5'])
+        self.assertEqual(list(map(str, df.geog.area)), ['0.0', 'nan', '0.5'])
+        self.assertEqual([v.__class__.__name__ for v in df.geog],
+                         ['Point', 'NoneType', 'Polygon'])
+
+        # Geog2 isn't a GeoSeries, but it contains geomentries:
+        self.assertIsInstance(df.geog2, pandas.Series)
+        self.assertEqual([v.__class__.__name__ for v in df.geog2],
+                         ['Point', 'Point', 'Point'])
+        # and can easily be converted to a GeoSeries
+        self.assertEqual(list(map(str, geopandas.GeoSeries(df.geog2).area)),
+                         ['0.0', '0.0', '0.0'])
+
 
 class TestPartitionRange(unittest.TestCase):
     def _get_target_class(self):

From ecff6f9c4b30b34ea58e599cff3194d207635d2b Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 11:36:49 -0400
Subject: [PATCH 03/40] blacken

---
 google/cloud/bigquery/table.py      | 20 +++++---
 tests/system/test_pandas.py         | 53 ++++++++++----------
 tests/unit/job/test_query_pandas.py | 54 ++++++++++++---------
 tests/unit/test_table.py            | 75 ++++++++++++++++-------------
 4 files changed, 110 insertions(+), 92 deletions(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 06f08f181..5f35a1520 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -35,7 +35,7 @@
 except ImportError:
     geopandas = None
 else:
-    CRS = 'EPSG:4326'
+    CRS = "EPSG:4326"
 
 try:
     from shapely import wkt
@@ -1983,7 +1983,7 @@ def to_dataframe(
 
         if geography_as_object:
             for field in self.schema:
-                if field.field_type.upper() == 'GEOGRAPHY':
+                if field.field_type.upper() == "GEOGRAPHY":
                     df[field.name] = df[field.name].dropna().apply(wkt.loads)
 
         return df
@@ -2075,24 +2075,28 @@ def to_geodataframe(
         geography_columns = set(
             field.name
             for field in self.schema
-            if field.field_type.upper() == 'GEOGRAPHY'
+            if field.field_type.upper() == "GEOGRAPHY"
         )
         if not geography_columns:
-            raise TypeError("There must be at least one GEOGRAPHY column"
-                            " to create a GeoDataFrame")
+            raise TypeError(
+                "There must be at least one GEOGRAPHY column"
+                " to create a GeoDataFrame"
+            )
 
         if geography_column:
             if geography_column not in geography_columns:
                 raise ValueError(
                     f"The given geography column, {geography_column} doesn't name"
-                    f" a GEOGRAPHY column in the result.")
+                    f" a GEOGRAPHY column in the result."
+                )
         elif len(geography_columns) == 1:
             [geography_column] = geography_columns
         else:
             raise ValueError(
                 "There is more than one GEOGRAPHY column in the result. "
                 "The geography_column argument must be used to specify which "
-                "one to use to create a GeoDataFrame")
+                "one to use to create a GeoDataFrame"
+            )
 
         df = self.to_dataframe(
             bqstorage_client,
@@ -2101,7 +2105,7 @@ def to_geodataframe(
             create_bqstorage_client,
             date_as_object,
             geography_as_object=True,
-            )
+        )
 
         return geopandas.GeoDataFrame(df, crs=CRS, geometry=geography_column)
 
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 9bbdac642..3b9c6cf63 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -817,17 +817,17 @@ def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
         ('bar', st_geogfromtext('point(0 1)')),
         ('baz', null)
         """
-        ).result()
-    df = (bigquery_client
-          .query(f"select * from {dataset_id}.lake order by name")
-          .to_dataframe(geography_as_object=True)
-          )
+    ).result()
+    df = bigquery_client.query(
+        f"select * from {dataset_id}.lake order by name"
+    ).to_dataframe(geography_as_object=True)
     assert str(df) == (
-        '  name         geog\n'
-        '0  bar  POINT (0 1)\n'
-        '1  baz          NaN\n'
-        '2  foo  POINT (0 0)')
-    assert [v.__class__.__name__ for v in df['geog']] == ['Point', 'float', 'Point']
+        "  name         geog\n"
+        "0  bar  POINT (0 1)\n"
+        "1  baz          NaN\n"
+        "2  foo  POINT (0 0)"
+    )
+    assert [v.__class__.__name__ for v in df["geog"]] == ["Point", "float", "Point"]
 
 
 def test_to_geodataframe(bigquery_client, dataset_id):
@@ -841,19 +841,20 @@ def test_to_geodataframe(bigquery_client, dataset_id):
         ('bar', st_geogfromtext('polygon((0 0, 1 1, 1 0, 0 0))')),
         ('baz', null)
         """
-        ).result()
-    df = (bigquery_client
-          .query(f"select * from {dataset_id}.lake order by name")
-          .to_geodataframe()
-          )
-    assert [v.__class__.__name__ for v in df['geog']] == [
-        'Polygon', 'NoneType', 'Point']
-    assert df.__class__.__name__ == 'GeoDataFrame'
-    assert df['geog'].__class__.__name__ == 'GeoSeries'
-    assert list(map(str, df['geog'])) == [
-        'POLYGON ((1 0, 1 1, 0 0, 1 0))', 'None', 'POINT (0 0)']
-    assert str(df.area) == (
-        '0    0.5\n'
-        '1    NaN\n'
-        '2    0.0\n'
-        'dtype: float64')
+    ).result()
+    df = bigquery_client.query(
+        f"select * from {dataset_id}.lake order by name"
+    ).to_geodataframe()
+    assert [v.__class__.__name__ for v in df["geog"]] == [
+        "Polygon",
+        "NoneType",
+        "Point",
+    ]
+    assert df.__class__.__name__ == "GeoDataFrame"
+    assert df["geog"].__class__.__name__ == "GeoSeries"
+    assert list(map(str, df["geog"])) == [
+        "POLYGON ((1 0, 1 1, 0 0, 1 0))",
+        "None",
+        "POINT (0 0)",
+    ]
+    assert str(df.area) == ("0    0.5\n" "1    NaN\n" "2    0.0\n" "dtype: float64")
diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index f973fb7ee..7454feab2 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -443,17 +443,11 @@ def _make_job(schema=(), rows=()):
         "totalRows": str(len(rows)),
         "schema": {
             "fields": [
-                dict(name=field[0], type=field[1], mode=field[2])
-                for field in schema
-                ]
-        },
-    }
-    tabledata_resource = {
-        "rows": [
-            {"f": [{"v": v} for v in row]}
-            for row in rows
+                dict(name=field[0], type=field[1], mode=field[2]) for field in schema
             ]
+        },
     }
+    tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]}
     done_resource = copy.deepcopy(begun_resource)
     done_resource["status"] = {"state": "DONE"}
     connection = _make_connection(
@@ -467,11 +461,13 @@ def _make_job(schema=(), rows=()):
 def test_to_dataframe():
     job = _make_job(
         (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")),
-        (("Phred Phlyntstone", "32"),
-         ("Bharney Rhubble", "33"),
-         ("Wylma Phlyntstone", "29"),
-         ("Bhettye Rhubble", "27"),
-         ))
+        (
+            ("Phred Phlyntstone", "32"),
+            ("Bharney Rhubble", "33"),
+            ("Wylma Phlyntstone", "29"),
+            ("Bhettye Rhubble", "27"),
+        ),
+    )
     df = job.to_dataframe(create_bqstorage_client=False)
 
     assert isinstance(df, pandas.DataFrame)
@@ -890,32 +886,42 @@ def test_to_dataframe_w_tqdm_max_results():
 def test_to_dataframe_geography_as_object():
     job = _make_job(
         (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
-        (("Phred Phlyntstone", "Point(0 0)"),
-         ("Bharney Rhubble", "Point(0 1)"),
-         ("Wylma Phlyntstone", None),
-         ))
+        (
+            ("Phred Phlyntstone", "Point(0 0)"),
+            ("Bharney Rhubble", "Point(0 1)"),
+            ("Wylma Phlyntstone", None),
+        ),
+    )
     df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True)
 
     assert isinstance(df, pandas.DataFrame)
     assert len(df) == 3  # verify the number of rows
     assert list(df) == ["name", "geog"]  # verify the column names
     assert [v.__class__.__name__ for v in df.geog] == [
-        'Point', 'Point', 'float']  # float because nan
+        "Point",
+        "Point",
+        "float",
+    ]  # float because nan
 
 
 @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
 def test_to_geodataframe():
     job = _make_job(
         (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
-        (("Phred Phlyntstone", "Point(0 0)"),
-         ("Bharney Rhubble", "Point(0 1)"),
-         ("Wylma Phlyntstone", None),
-         ))
+        (
+            ("Phred Phlyntstone", "Point(0 0)"),
+            ("Bharney Rhubble", "Point(0 1)"),
+            ("Wylma Phlyntstone", None),
+        ),
+    )
     df = job.to_geodataframe(create_bqstorage_client=False)
 
     assert isinstance(df, geopandas.GeoDataFrame)
     assert len(df) == 3  # verify the number of rows
     assert list(df) == ["name", "geog"]  # verify the column names
     assert [v.__class__.__name__ for v in df.geog] == [
-        'Point', 'Point', 'NoneType']  # float because nan
+        "Point",
+        "Point",
+        "NoneType",
+    ]  # float because nan
     assert isinstance(df.geog, geopandas.GeoSeries)
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index e09e85774..8bc14a126 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -1716,11 +1716,9 @@ def _make_one(
 
     def _make_one_from_data(self, schema=(), rows=()):
         from google.cloud.bigquery.schema import SchemaField
+
         schema = [SchemaField(*a) for a in schema]
-        rows = [
-            {"f": [{"v": v} for v in row]}
-            for row in rows
-            ]
+        rows = [{"f": [{"v": v} for v in row]} for row in rows]
 
         path = "/foo"
         api_request = mock.Mock(return_value={"rows": rows})
@@ -3026,7 +3024,6 @@ def test_to_dataframe_error_if_shapely_wkt_is_none(self):
             # No can do if no shapely
             self._make_one_from_data().to_dataframe(geography_as_object=True)
 
-
     @unittest.skipIf(pandas is None, "Requires `pandas`")
     def test_to_dataframe_max_results_w_bqstorage_warning(self):
         from google.cloud.bigquery.schema import SchemaField
@@ -3788,10 +3785,12 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self):
     def test_to_dataframe_geography_as_object(self):
         row_iterator = self._make_one_from_data(
             (("name", "STRING"), ("geog", "GEOGRAPHY")),
-            (("foo", "Point(0 0)"),
-             ("bar", None),
-             ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
-             ))
+            (
+                ("foo", "Point(0 0)"),
+                ("bar", None),
+                ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+            ),
+        )
         df = row_iterator.to_dataframe(
             create_bqstorage_client=False, geography_as_object=True,
         )
@@ -3801,8 +3800,9 @@ def test_to_dataframe_geography_as_object(self):
         self.assertEqual(df.name.dtype.name, "object")
         self.assertEqual(df.geog.dtype.name, "object")
         self.assertIsInstance(df.geog, pandas.Series)
-        self.assertEqual([v.__class__.__name__ for v in df.geog],
-                         ['Point', 'float', 'Polygon'])
+        self.assertEqual(
+            [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"]
+        )
 
     @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
     def test_to_geodataframe_error_if_geopandas_is_none(self):
@@ -3814,10 +3814,12 @@ def test_to_geodataframe_error_if_geopandas_is_none(self):
     def test_to_geodataframe(self):
         row_iterator = self._make_one_from_data(
             (("name", "STRING"), ("geog", "GEOGRAPHY")),
-            (("foo", "Point(0 0)"),
-             ("bar", None),
-             ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
-             ))
+            (
+                ("foo", "Point(0 0)"),
+                ("bar", None),
+                ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+            ),
+        )
         df = row_iterator.to_geodataframe(create_bqstorage_client=False)
         self.assertIsInstance(df, geopandas.GeoDataFrame)
         self.assertEqual(len(df), 3)  # verify the number of rows
@@ -3825,22 +3827,22 @@ def test_to_geodataframe(self):
         self.assertEqual(df.name.dtype.name, "object")
         self.assertEqual(df.geog.dtype.name, "geometry")
         self.assertIsInstance(df.geog, geopandas.GeoSeries)
-        self.assertEqual(list(map(str, df.area)), ['0.0', 'nan', '0.5'])
-        self.assertEqual(list(map(str, df.geog.area)), ['0.0', 'nan', '0.5'])
+        self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
+        self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
     def test_to_geodataframe_ambiguous_geog(self):
         row_iterator = self._make_one_from_data(
-            (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")),
-            ())
+            (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
+        )
         with self.assertRaises(ValueError):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
     def test_to_geodataframe_no_geog(self):
         row_iterator = self._make_one_from_data(
-            (("name", "STRING"), ("geog", "STRING")),
-            ())
+            (("name", "STRING"), ("geog", "STRING")), ()
+        )
         with self.assertRaises(TypeError):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 
@@ -3848,12 +3850,14 @@ def test_to_geodataframe_no_geog(self):
     def test_to_geodataframe_w_geography_column(self):
         row_iterator = self._make_one_from_data(
             (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")),
-            (("foo", "Point(0 0)", "Point(1 1)"),
-             ("bar", None, "Point(2 2)"),
-             ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"),
-             ))
+            (
+                ("foo", "Point(0 0)", "Point(1 1)"),
+                ("bar", None, "Point(2 2)"),
+                ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"),
+            ),
+        )
         df = row_iterator.to_geodataframe(
-            create_bqstorage_client=False, geography_column='geog'
+            create_bqstorage_client=False, geography_column="geog"
         )
         self.assertIsInstance(df, geopandas.GeoDataFrame)
         self.assertEqual(len(df), 3)  # verify the number of rows
@@ -3862,18 +3866,21 @@ def test_to_geodataframe_w_geography_column(self):
         self.assertEqual(df.geog.dtype.name, "geometry")
         self.assertEqual(df.geog2.dtype.name, "object")
         self.assertIsInstance(df.geog, geopandas.GeoSeries)
-        self.assertEqual(list(map(str, df.area)), ['0.0', 'nan', '0.5'])
-        self.assertEqual(list(map(str, df.geog.area)), ['0.0', 'nan', '0.5'])
-        self.assertEqual([v.__class__.__name__ for v in df.geog],
-                         ['Point', 'NoneType', 'Polygon'])
+        self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
+        self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
+        self.assertEqual(
+            [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"]
+        )
 
         # Geog2 isn't a GeoSeries, but it contains geomentries:
         self.assertIsInstance(df.geog2, pandas.Series)
-        self.assertEqual([v.__class__.__name__ for v in df.geog2],
-                         ['Point', 'Point', 'Point'])
+        self.assertEqual(
+            [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"]
+        )
         # and can easily be converted to a GeoSeries
-        self.assertEqual(list(map(str, geopandas.GeoSeries(df.geog2).area)),
-                         ['0.0', '0.0', '0.0'])
+        self.assertEqual(
+            list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"]
+        )
 
 
 class TestPartitionRange(unittest.TestCase):

From 766be7742f85b92f9139e9dbbd9c99bc7f8c8341 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 11:57:13 -0400
Subject: [PATCH 04/40] Need to use different table names in tests.

---
 tests/system/test_pandas.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 3b9c6cf63..029001306 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -832,18 +832,18 @@ def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
 
 def test_to_geodataframe(bigquery_client, dataset_id):
     bigquery_client.query(
-        f"create table {dataset_id}.lake (name string, geog geography)"
+        f"create table {dataset_id}.geolake (name string, geog geography)"
     ).result()
     bigquery_client.query(
         f"""
-        insert into {dataset_id}.lake (name, geog) values
+        insert into {dataset_id}.geolake (name, geog) values
         ('foo', st_geogfromtext('point(0 0)')),
         ('bar', st_geogfromtext('polygon((0 0, 1 1, 1 0, 0 0))')),
         ('baz', null)
         """
     ).result()
     df = bigquery_client.query(
-        f"select * from {dataset_id}.lake order by name"
+        f"select * from {dataset_id}.geolake order by name"
     ).to_geodataframe()
     assert [v.__class__.__name__ for v in df["geog"]] == [
         "Polygon",

From a9045042da362c1def181328e90ea97f3d3aad0f Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 13:40:42 -0400
Subject: [PATCH 05/40] check messages in error assertions (for geo)

---
 tests/unit/test_table.py | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 8bc14a126..9f88bedb9 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -14,6 +14,7 @@
 
 import datetime
 import logging
+import re
 import time
 import types
 import unittest
@@ -3020,8 +3021,13 @@ def test_to_dataframe_error_if_pandas_is_none(self):
 
     @mock.patch("google.cloud.bigquery.table.wkt", new=None)
     def test_to_dataframe_error_if_shapely_wkt_is_none(self):
-        with self.assertRaises(ValueError):
-            # No can do if no shapely
+        with self.assertRaisesRegex(
+            ValueError,
+            re.escape(
+                "The shapely library is not installed, please install "
+                "shapely to use the geography_as_object option."
+                ),
+        ):
             self._make_one_from_data().to_dataframe(geography_as_object=True)
 
     @unittest.skipIf(pandas is None, "Requires `pandas`")
@@ -3806,8 +3812,12 @@ def test_to_dataframe_geography_as_object(self):
 
     @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
     def test_to_geodataframe_error_if_geopandas_is_none(self):
-        with self.assertRaises(ValueError):
-            # No can do if no shapely
+        with self.assertRaisesRegex(
+            ValueError,
+            re.escape(
+                "The geopandas library is not installed, please install "
+                "geopandas to use the to_geodataframe() function."),
+        ):
             self._make_one_from_data().to_geodataframe()
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
@@ -3835,7 +3845,13 @@ def test_to_geodataframe_ambiguous_geog(self):
         row_iterator = self._make_one_from_data(
             (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
         )
-        with self.assertRaises(ValueError):
+        with self.assertRaisesRegex(
+            ValueError,
+            re.escape(
+                "There is more than one GEOGRAPHY column in the result. "
+                "The geography_column argument must be used to specify which "
+                "one to use to create a GeoDataFrame"),
+        ):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
@@ -3843,7 +3859,11 @@ def test_to_geodataframe_no_geog(self):
         row_iterator = self._make_one_from_data(
             (("name", "STRING"), ("geog", "STRING")), ()
         )
-        with self.assertRaises(TypeError):
+        with self.assertRaisesRegex(
+            TypeError,
+            re.escape("There must be at least one GEOGRAPHY column"
+                      " to create a GeoDataFrame"),
+        ):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")

From bc985f95ceffef6f2e1b5e0ae8940fe614f5c264 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 13:49:48 -0400
Subject: [PATCH 06/40] added missing comma

---
 google/cloud/bigquery/table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 5f35a1520..490613b6d 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -2086,7 +2086,7 @@ def to_geodataframe(
         if geography_column:
             if geography_column not in geography_columns:
                 raise ValueError(
-                    f"The given geography column, {geography_column} doesn't name"
+                    f"The given geography column, {geography_column}, doesn't name"
                     f" a GEOGRAPHY column in the result."
                 )
         elif len(geography_columns) == 1:

From fb6eafdb0ddda51c9570374fa14fa382ce7015d5 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 13:50:07 -0400
Subject: [PATCH 07/40] Added missing tests.

---
 tests/unit/test_table.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 9f88bedb9..93a9aee4c 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -1671,6 +1671,18 @@ def test_to_dataframe_iterable(self):
         self.assertEqual(len(df), 0)  # Verify the number of rows.
         self.assertEqual(len(df.columns), 0)
 
+    @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
+    def test_to_geodataframe_if_geopandas_is_none(self):
+        row_iterator = self._make_one()
+        with self.assertRaisesRegex(
+            ValueError,
+            re.escape(
+                "The geopandas library is not installed, please install "
+                "geopandas to use the to_geodataframe() function."
+                ),
+        ):
+            row_iterator.to_geodataframe(create_bqstorage_client=False)
+
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
     def test_to_geodataframe(self):
         row_iterator = self._make_one()
@@ -3854,6 +3866,20 @@ def test_to_geodataframe_ambiguous_geog(self):
         ):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    def test_to_geodataframe_bad_geography_column(self):
+        row_iterator = self._make_one_from_data(
+            (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
+        )
+        with self.assertRaisesRegex(
+            ValueError,
+            re.escape(
+                "The given geography column, xxx, doesn't name"
+                " a GEOGRAPHY column in the result."),
+        ):
+            row_iterator.to_geodataframe(create_bqstorage_client=False,
+                                         geography_column="xxx")
+
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
     def test_to_geodataframe_no_geog(self):
         row_iterator = self._make_one_from_data(

From 0547801dad52504085bb365646305ec2dcd0cfff Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 13:55:57 -0400
Subject: [PATCH 08/40] blacken

---
 tests/unit/test_table.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 93a9aee4c..ea61d3868 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -1679,7 +1679,7 @@ def test_to_geodataframe_if_geopandas_is_none(self):
             re.escape(
                 "The geopandas library is not installed, please install "
                 "geopandas to use the to_geodataframe() function."
-                ),
+            ),
         ):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 
@@ -3038,7 +3038,7 @@ def test_to_dataframe_error_if_shapely_wkt_is_none(self):
             re.escape(
                 "The shapely library is not installed, please install "
                 "shapely to use the geography_as_object option."
-                ),
+            ),
         ):
             self._make_one_from_data().to_dataframe(geography_as_object=True)
 
@@ -3828,7 +3828,8 @@ def test_to_geodataframe_error_if_geopandas_is_none(self):
             ValueError,
             re.escape(
                 "The geopandas library is not installed, please install "
-                "geopandas to use the to_geodataframe() function."),
+                "geopandas to use the to_geodataframe() function."
+            ),
         ):
             self._make_one_from_data().to_geodataframe()
 
@@ -3862,7 +3863,8 @@ def test_to_geodataframe_ambiguous_geog(self):
             re.escape(
                 "There is more than one GEOGRAPHY column in the result. "
                 "The geography_column argument must be used to specify which "
-                "one to use to create a GeoDataFrame"),
+                "one to use to create a GeoDataFrame"
+            ),
         ):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 
@@ -3875,10 +3877,12 @@ def test_to_geodataframe_bad_geography_column(self):
             ValueError,
             re.escape(
                 "The given geography column, xxx, doesn't name"
-                " a GEOGRAPHY column in the result."),
+                " a GEOGRAPHY column in the result."
+            ),
         ):
-            row_iterator.to_geodataframe(create_bqstorage_client=False,
-                                         geography_column="xxx")
+            row_iterator.to_geodataframe(
+                create_bqstorage_client=False, geography_column="xxx"
+            )
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
     def test_to_geodataframe_no_geog(self):
@@ -3887,8 +3891,10 @@ def test_to_geodataframe_no_geog(self):
         )
         with self.assertRaisesRegex(
             TypeError,
-            re.escape("There must be at least one GEOGRAPHY column"
-                      " to create a GeoDataFrame"),
+            re.escape(
+                "There must be at least one GEOGRAPHY column"
+                " to create a GeoDataFrame"
+            ),
         ):
             row_iterator.to_geodataframe(create_bqstorage_client=False)
 

From e0dde7c85c4b8ff43ed26ce687c76eaaa052ae10 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 15:10:19 -0400
Subject: [PATCH 09/40] Added tests showing delegation in some to_geodataframe
 implementations

---
 tests/unit/job/test_query_pandas.py | 45 +++++++++++++++++++++++
 tests/unit/test_table.py            | 56 +++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)

diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index 7454feab2..0bb5b8122 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -925,3 +925,48 @@ def test_to_geodataframe():
         "NoneType",
     ]  # float because nan
     assert isinstance(df.geog, geopandas.GeoSeries)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@mock.patch("google.cloud.bigquery.job.query.wait_for_query")
+def test_query_job_to_geodataframe_delegation(wait_for_query):
+    """
+    QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe.
+
+    This test just demonstrates that. We don't need to test all the
+    variations, which are tested for RowIterator.
+    """
+    import numpy
+    job = _make_job()
+    bqstorage_client = object()
+    dtypes = dict(xxx=numpy.dtype('int64'))
+    progress_bar_type = 'normal'
+    create_bqstorage_client = False
+    date_as_object = False
+    max_results = 42
+    geography_column = 'g'
+
+    df = job.to_geodataframe(
+        bqstorage_client=bqstorage_client,
+        dtypes=dtypes,
+        progress_bar_type=progress_bar_type,
+        create_bqstorage_client=create_bqstorage_client,
+        date_as_object=date_as_object,
+        max_results=max_results,
+        geography_column=geography_column,
+        )
+
+    wait_for_query.assert_called_once_with(
+        job, progress_bar_type, max_results=max_results
+    )
+    row_iterator = wait_for_query.return_value
+    row_iterator.to_geodataframe.assert_called_once_with(
+        bqstorage_client=bqstorage_client,
+        dtypes=dtypes,
+        progress_bar_type=progress_bar_type,
+        create_bqstorage_client=create_bqstorage_client,
+        date_as_object=date_as_object,
+        geography_column=geography_column,
+        )
+    assert df is row_iterator.to_geodataframe.return_value
+
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index ea61d3868..a045fd920 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -3934,6 +3934,62 @@ def test_to_geodataframe_w_geography_column(self):
             list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"]
         )
 
+    @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+    @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe")
+    def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
+        """
+        RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe.
+
+        This test just demonstrates that. We don't need to test all the
+        variations, which are tested for to_dataframe.
+        """
+        import numpy
+        import shapely
+
+        row_iterator = self._make_one_from_data(
+            (("name", "STRING"), ("g", "GEOGRAPHY"))
+        )
+        bqstorage_client = object()
+        dtypes = dict(xxx=numpy.dtype('int64'))
+        progress_bar_type = 'normal'
+        create_bqstorage_client = False
+        date_as_object = False
+        geography_column = 'g'
+
+        to_dataframe.return_value = pandas.DataFrame(
+            dict(name=["foo"],
+                 g=[shapely.wkt.loads('point(0 0)')],
+                 ))
+
+        df = row_iterator.to_geodataframe(
+            bqstorage_client=bqstorage_client,
+            dtypes=dtypes,
+            progress_bar_type=progress_bar_type,
+            create_bqstorage_client=create_bqstorage_client,
+            date_as_object=date_as_object,
+            geography_column=geography_column,
+            )
+
+        to_dataframe.assert_called_once_with(
+            bqstorage_client,
+            dtypes,
+            progress_bar_type,
+            create_bqstorage_client,
+            date_as_object,
+            geography_as_object=True)
+
+        self.assertIsInstance(df, geopandas.GeoDataFrame)
+        self.assertEqual(len(df), 1)  # verify the number of rows
+        self.assertEqual(list(df), ["name", "g"])  # verify the column names
+        self.assertEqual(df.name.dtype.name, "object")
+        self.assertEqual(df.g.dtype.name, "geometry")
+        self.assertIsInstance(df.g, geopandas.GeoSeries)
+        self.assertEqual(list(map(str, df.area)), ["0.0"])
+        self.assertEqual(list(map(str, df.g.area)), ["0.0"])
+        self.assertEqual(
+            [v.__class__.__name__ for v in df.g], ["Point"]
+        )
+
 
 class TestPartitionRange(unittest.TestCase):
     def _get_target_class(self):

From 177b099b3650f977a1bda011b2c56181055c99e0 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 15:19:58 -0400
Subject: [PATCH 10/40] Added a missing skip

---
 tests/unit/test_table.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index a045fd920..f4e38da14 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -3031,6 +3031,7 @@ def test_to_dataframe_error_if_pandas_is_none(self):
         with self.assertRaises(ValueError):
             row_iterator.to_dataframe()
 
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
     @mock.patch("google.cloud.bigquery.table.wkt", new=None)
     def test_to_dataframe_error_if_shapely_wkt_is_none(self):
         with self.assertRaisesRegex(

From 86820e29ff79bd7fa18e871fb18d09c26ddd2f12 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 3 Aug 2021 16:17:18 -0400
Subject: [PATCH 11/40] blacken

---
 tests/unit/job/test_query_pandas.py | 12 ++++++------
 tests/unit/test_table.py            | 20 +++++++++-----------
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index 0bb5b8122..b5af90c0b 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -937,14 +937,15 @@ def test_query_job_to_geodataframe_delegation(wait_for_query):
     variations, which are tested for RowIterator.
     """
     import numpy
+
     job = _make_job()
     bqstorage_client = object()
-    dtypes = dict(xxx=numpy.dtype('int64'))
-    progress_bar_type = 'normal'
+    dtypes = dict(xxx=numpy.dtype("int64"))
+    progress_bar_type = "normal"
     create_bqstorage_client = False
     date_as_object = False
     max_results = 42
-    geography_column = 'g'
+    geography_column = "g"
 
     df = job.to_geodataframe(
         bqstorage_client=bqstorage_client,
@@ -954,7 +955,7 @@ def test_query_job_to_geodataframe_delegation(wait_for_query):
         date_as_object=date_as_object,
         max_results=max_results,
         geography_column=geography_column,
-        )
+    )
 
     wait_for_query.assert_called_once_with(
         job, progress_bar_type, max_results=max_results
@@ -967,6 +968,5 @@ def test_query_job_to_geodataframe_delegation(wait_for_query):
         create_bqstorage_client=create_bqstorage_client,
         date_as_object=date_as_object,
         geography_column=geography_column,
-        )
+    )
     assert df is row_iterator.to_geodataframe.return_value
-
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index f4e38da14..7c0057404 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -3951,16 +3951,15 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
             (("name", "STRING"), ("g", "GEOGRAPHY"))
         )
         bqstorage_client = object()
-        dtypes = dict(xxx=numpy.dtype('int64'))
-        progress_bar_type = 'normal'
+        dtypes = dict(xxx=numpy.dtype("int64"))
+        progress_bar_type = "normal"
         create_bqstorage_client = False
         date_as_object = False
-        geography_column = 'g'
+        geography_column = "g"
 
         to_dataframe.return_value = pandas.DataFrame(
-            dict(name=["foo"],
-                 g=[shapely.wkt.loads('point(0 0)')],
-                 ))
+            dict(name=["foo"], g=[shapely.wkt.loads("point(0 0)")],)
+        )
 
         df = row_iterator.to_geodataframe(
             bqstorage_client=bqstorage_client,
@@ -3969,7 +3968,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
             create_bqstorage_client=create_bqstorage_client,
             date_as_object=date_as_object,
             geography_column=geography_column,
-            )
+        )
 
         to_dataframe.assert_called_once_with(
             bqstorage_client,
@@ -3977,7 +3976,8 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
             progress_bar_type,
             create_bqstorage_client,
             date_as_object,
-            geography_as_object=True)
+            geography_as_object=True,
+        )
 
         self.assertIsInstance(df, geopandas.GeoDataFrame)
         self.assertEqual(len(df), 1)  # verify the number of rows
@@ -3987,9 +3987,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
         self.assertIsInstance(df.g, geopandas.GeoSeries)
         self.assertEqual(list(map(str, df.area)), ["0.0"])
         self.assertEqual(list(map(str, df.g.area)), ["0.0"])
-        self.assertEqual(
-            [v.__class__.__name__ for v in df.g], ["Point"]
-        )
+        self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"])
 
 
 class TestPartitionRange(unittest.TestCase):

From c298fbdd4b59748cafe7396d0561db833c68fa2a Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 4 Aug 2021 09:08:25 -0400
Subject: [PATCH 12/40] doc tweaks

---
 google/cloud/bigquery/job/query.py | 48 ++++++++++++++++++++++--------
 google/cloud/bigquery/table.py     | 43 +++++++++++++-------------
 2 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index eb7027131..c3c280462 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -1475,13 +1475,27 @@ def to_dataframe(
 
                 .. versionadded:: 2.21.0
 
+            geography_as_object (Optional[bool]):
+                If ``True``, convert GEOGRAPHY data to :mod:`shapely`
+                geometry objects.  If ``False`` (default), don't cast
+                geography data to :mod:`shapely` geometry objects.
+
+                .. versionadded:: 2.24.0
+
         Returns:
-            A :class:`~pandas.DataFrame` populated with row data and column
-            headers from the query results. The column headers are derived
-            from the destination table's schema.
+            `pandas.DataFrame`:
+                A :class:`~pandas.DataFrame` populated with row data
+                and column headers from the query results. The column
+                headers are derived from the destination table's
+                schema.
 
         Raises:
-            ValueError: If the `pandas` library cannot be imported.
+            ValueError:
+                If the :mod:`pandas` library cannot be imported, or
+                the :mod:`google.cloud.bigquery_storage_v1` module is
+                required but cannot be imported.  Also if
+                `geography_as_object` is `True`, but the
+                :mod:`shapely` library cannot be imported.
         """
         query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
         return query_result.to_dataframe(
@@ -1505,8 +1519,8 @@ def to_geodataframe(
         date_as_object: bool = True,
         max_results: Optional[int] = None,
         geography_column: Optional[str] = None,
-    ) -> "pandas.DataFrame":
-        """Return a pandas DataFrame from a QueryJob
+    ) -> "geopandas.GeoDataFrame":
+        """Return a GeoPandas GeoDataFrame from a QueryJob
 
         Args:
             bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
@@ -1557,17 +1571,25 @@ def to_geodataframe(
                 .. versionadded:: 2.21.0
 
             geography_column (Optional[str]):
-                If there are more than one GEOGRAPHY columns, which one to use
-                to construct a geopandas GeoDataFrame.  This option can be ommitted
-                if there's only one GEOGRAPHY column.
+                If there are more than one GEOGRAPHY column,
+                identifies which one to use to construct a GeoPandas
+                GeoDataFrame.  This option can be ommitted if there's
+                only one GEOGRAPHY column.
 
         Returns:
-            A :class:`~pandas.DataFrame` populated with row data and column
-            headers from the query results. The column headers are derived
-            from the destination table's schema.
+            `geopandas.GeoDataFrame`:
+                A :class:`geopandas.GeoDataFrame` populated with row
+                data and column headers from the query results. The
+                column headers are derived from the destination
+                table's schema.
 
         Raises:
-            ValueError: If the `pandas` library cannot be imported.
+            ValueError:
+               If the :mod:`geopandas` library cannot be imported, or the
+                :mod:`google.cloud.bigquery_storage_v1` module is
+                required but cannot be imported.
+
+        .. versionadded:: 2.24.0
         """
         query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
         return query_result.to_geodataframe(
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 490613b6d..b626defb1 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -1918,22 +1918,25 @@ def to_dataframe(
                 .. versionadded:: 1.26.0
 
             geography_as_object (Optional[bool]):
-                If ``True``, convert geography data to shapely objects.
-                If ``False`` (default), don't cast geography data to shapely objects.
+                If ``True``, convert GEOGRAPHY data to :mod:`shapely`
+                geometry objects. If ``False`` (default), don't cast
+                geography data to :mod:`shapely` geometry objects.
 
-                .. versionadded:: ???
+                .. versionadded:: 2.24.0
 
         Returns:
-            pandas.DataFrame:
+            `pandas.DataFrame`:
                 A :class:`~pandas.DataFrame` populated with row data and column
                 headers from the query results. The column headers are derived
                 from the destination table's schema.
 
         Raises:
             ValueError:
-                If the :mod:`pandas` library cannot be imported, or the
-                :mod:`google.cloud.bigquery_storage_v1` module is
-                required but cannot be imported.
+                If the :mod:`pandas` library cannot be imported, or
+                the :mod:`google.cloud.bigquery_storage_v1` module is
+                required but cannot be imported.  Also if
+                `geography_as_object` is `True`, but the
+                :mod:`shapely` library cannot be imported.
 
         """
         if pandas is None:
@@ -1998,8 +2001,8 @@ def to_geodataframe(
         create_bqstorage_client: bool = True,
         date_as_object: bool = True,
         geography_column: Optional[str] = None,
-    ) -> "pandas.DataFrame":
-        """Create a pandas DataFrame by loading all pages of a query.
+    ) -> "geopandas.GeoDataFrame":
+        """Create a GeoPandas GeoDataFrame by loading all pages of a query.
 
         Args:
             bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
@@ -2049,25 +2052,25 @@ def to_geodataframe(
                 to datetime64[ns] dtype.
 
             geography_column (Optional[str]):
-                If there are more than one GEOGRAPHY columns, which one to use
-                to construct a geopandas GeoDataFrame.  This option can be ommitted
-                if there's only one GEOGRAPHY column.
+                If there are more than one GEOGRAPHY column,
+                identifies which one to use to construct a geopandas
+                GeoDataFrame.  This option can be ommitted if there's
+                only one GEOGRAPHY column.
 
         Returns:
-            geopandas.GeoDataFrame:
-                A :class:`~pandas.DataFrame` populated with row data and column
-                headers from the query results. The column headers are derived
-                from the destination table's schema.
+            `geopandas.GeoDataFrame`:
+                A :class:`geopandas.GeoDataFrame` populated with row
+                data and column headers from the query results. The
+                column headers are derived from the destination
+                table's schema.
 
         Raises:
             ValueError:
-                If the :mod:`pandas` library cannot be imported, or the
+                If the :mod:`geopandas` library cannot be imported, or the
                 :mod:`google.cloud.bigquery_storage_v1` module is
                 required but cannot be imported.
 
-
-        .. versionadded:: ???
-
+        .. versionadded:: 2.24.0
         """
         if geopandas is None:
             raise ValueError(_NO_GEOPANDAS_ERROR)

From e9cc38f91830a09772a84888ebb7e95ca6a50e0a Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 4 Aug 2021 11:09:52 -0400
Subject: [PATCH 13/40] Updated dataframe_to_bq_schema to recognize geography
 data

---
 google/cloud/bigquery/_pandas_helpers.py | 15 ++++++++++++++
 tests/unit/test__pandas_helpers.py       | 25 ++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index b381fa5f7..89aef4422 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -27,6 +27,11 @@
 except ImportError:  # pragma: NO COVER
     pandas = None
 
+try:
+    from shapely.geometry.base import BaseGeometry
+except ImportError:  # pragma: NO COVER
+    BaseGeometry = type(None)
+
 try:
     import pyarrow
     import pyarrow.parquet
@@ -71,6 +76,7 @@
     "uint8": "INTEGER",
     "uint16": "INTEGER",
     "uint32": "INTEGER",
+    "geometry": "GEOGRAPHY",
 }
 
 
@@ -328,6 +334,15 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
         # Otherwise, try to automatically determine the type based on the
         # pandas dtype.
         bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
+        if bq_type is None:
+            column_data = dataframe[column]
+            first_valid_index = column_data.first_valid_index()
+            if first_valid_index is not None:
+                sample_data = column_data.at[first_valid_index]
+                if (isinstance(sample_data, BaseGeometry)
+                    and not sample_data is None  # Paranoia
+                ):
+                    bq_type = 'GEOGRAPHY'
         bq_field = schema.SchemaField(column, bq_type)
         bq_schema_out.append(bq_field)
 
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index 0ba671cd9..a48aecbf6 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -36,6 +36,11 @@
     # Mock out pyarrow when missing, because methods from pyarrow.types are
     # used in test parameterization.
     pyarrow = mock.Mock()
+try:
+    import geopandas
+except ImportError:  # pragma: NO COVER
+    geopandas = None
+
 import pytest
 import pytz
 
@@ -1175,6 +1180,26 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
     assert "struct_field" in str(expected_warnings[0])
 
 
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_dataframe_to_bq_schema_geography(module_under_test):
+    from shapely import wkt
+
+    df = geopandas.GeoDataFrame(
+        pandas.DataFrame(
+            dict(name=['foo', 'bar'],
+                 geo1=[None, None],
+                 geo2=[None, wkt.loads('Point(1 1)')])
+            ),
+        geometry='geo1',
+        )
+    bq_schema = module_under_test.dataframe_to_bq_schema(df, [])
+    assert bq_schema == (
+        schema.SchemaField('name', 'STRING'),
+        schema.SchemaField('geo1', 'GEOGRAPHY'),
+        schema.SchemaField('geo2', 'GEOGRAPHY'),
+    )
+
+
 @pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
 def test_augment_schema_type_detection_succeeds(module_under_test):

From 5f866ff6dbe4c2cf92761baa28cae7ed3f999f3f Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 4 Aug 2021 16:34:13 -0400
Subject: [PATCH 14/40] Make load_from_pandas work with GeoSeries, shapely
 data, and binary wkb data

---
 google/cloud/bigquery/_pandas_helpers.py | 60 ++++++++++++----
 tests/system/test_pandas.py              | 91 ++++++++++++++++++++++++
 tests/unit/test__pandas_helpers.py       | 70 ++++++++++++++++++
 3 files changed, 208 insertions(+), 13 deletions(-)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 89aef4422..a3b872575 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -26,11 +26,23 @@
     import pandas
 except ImportError:  # pragma: NO COVER
     pandas = None
+else:
+    import numpy
 
 try:
-    from shapely.geometry.base import BaseGeometry
+    from shapely.geometry.base import BaseGeometry as _BaseGeometry
 except ImportError:  # pragma: NO COVER
-    BaseGeometry = type(None)
+    _BaseGeometry = type(None)
+else:
+    if pandas is not None:
+        def _to_wkb():
+            from shapely.geos import WKBWriter, lgeos
+            write = WKBWriter(lgeos).write
+            notnull = pandas.notnull
+            def _to_wkb(v):
+                return write(v) if notnull(v) else v
+            return _to_wkb
+        _to_wkb = _to_wkb()
 
 try:
     import pyarrow
@@ -130,6 +142,7 @@ def pyarrow_timestamp():
         "STRING": pyarrow.string,
         "TIME": pyarrow_time,
         "TIMESTAMP": pyarrow_timestamp,
+        "GEOGRAPHY": pyarrow.string,  # Unless we have ninary data
     }
     ARROW_SCALAR_IDS_TO_BQ = {
         # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
@@ -208,14 +221,16 @@ def bq_to_arrow_data_type(field):
     return data_type_constructor()
 
 
-def bq_to_arrow_field(bq_field):
+def bq_to_arrow_field(bq_field, array_type=None):
     """Return the Arrow field, corresponding to a given BigQuery column.
 
     Returns:
         None: if the Arrow type cannot be determined.
     """
     arrow_type = bq_to_arrow_data_type(bq_field)
-    if arrow_type:
+    if arrow_type is not None:
+        if array_type is not None:
+            arrow_type = array_type  # For GEOGRAPHY, at least initially
         is_nullable = bq_field.mode.upper() == "NULLABLE"
         return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable)
 
@@ -240,7 +255,24 @@ def bq_to_arrow_schema(bq_schema):
 
 
 def bq_to_arrow_array(series, bq_field):
-    arrow_type = bq_to_arrow_data_type(bq_field)
+    if bq_field.field_type.upper() == 'GEOGRAPHY':
+        arrow_type = None
+        first = _first_valid(series)
+        if first is not None:
+            if series.dtype.name == 'geometry' or isinstance(first, _BaseGeometry):
+                arrow_type = pyarrow.binary()
+                # Convert shapey geometry to WKB binary format:
+                series = series.apply(_to_wkb)
+            elif isinstance(first, bytes):
+                arrow_type = pyarrow.binary()
+        elif series.dtype.name == 'geometry':
+            # We have a GeoSeries containing all nulls, convert it to a pandas series
+            series = pandas.Series(numpy.array(series))
+
+        if arrow_type is None:
+            arrow_type = bq_to_arrow_data_type(bq_field)
+    else:
+        arrow_type = bq_to_arrow_data_type(bq_field)
 
     field_type_upper = bq_field.field_type.upper() if bq_field.field_type else ""
 
@@ -294,6 +326,11 @@ def list_columns_and_indexes(dataframe):
     return columns_and_indexes
 
 
+def _first_valid(series):
+    first_valid_index = series.first_valid_index()
+    if first_valid_index is not None:
+        return series.at[first_valid_index]
+
 def dataframe_to_bq_schema(dataframe, bq_schema):
     """Convert a pandas DataFrame schema to a BigQuery schema.
 
@@ -335,14 +372,11 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
         # pandas dtype.
         bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
         if bq_type is None:
-            column_data = dataframe[column]
-            first_valid_index = column_data.first_valid_index()
-            if first_valid_index is not None:
-                sample_data = column_data.at[first_valid_index]
-                if (isinstance(sample_data, BaseGeometry)
-                    and not sample_data is None  # Paranoia
+            sample_data = _first_valid(dataframe[column])
+            if (isinstance(sample_data, _BaseGeometry)
+                and not sample_data is None  # Paranoia
                 ):
-                    bq_type = 'GEOGRAPHY'
+                bq_type = 'GEOGRAPHY'
         bq_field = schema.SchemaField(column, bq_type)
         bq_schema_out.append(bq_field)
 
@@ -474,11 +508,11 @@ def dataframe_to_arrow(dataframe, bq_schema):
     arrow_names = []
     arrow_fields = []
     for bq_field in bq_schema:
-        arrow_fields.append(bq_to_arrow_field(bq_field))
         arrow_names.append(bq_field.name)
         arrow_arrays.append(
             bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field)
         )
+        arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))
 
     if all((field is not None for field in arrow_fields)):
         return pyarrow.Table.from_arrays(
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 029001306..d4bcd0f54 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -807,6 +807,7 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
 
 
 def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
+    pytest.importorskip("shapely")
     bigquery_client.query(
         f"create table {dataset_id}.lake (name string, geog geography)"
     ).result()
@@ -831,6 +832,7 @@ def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
 
 
 def test_to_geodataframe(bigquery_client, dataset_id):
+    pytest.importorskip("geopandas")
     bigquery_client.query(
         f"create table {dataset_id}.geolake (name string, geog geography)"
     ).result()
@@ -858,3 +860,92 @@ def test_to_geodataframe(bigquery_client, dataset_id):
         "POINT (0 0)",
     ]
     assert str(df.area) == ("0    0.5\n" "1    NaN\n" "2    0.0\n" "dtype: float64")
+
+
+def test_load_geodataframe(bigquery_client, dataset_id):
+    geopandas = pytest.importorskip("geopandas")
+    import pandas
+    from shapely import wkt
+    from google.cloud.bigquery.schema import SchemaField
+
+    df = geopandas.GeoDataFrame(
+        pandas.DataFrame(
+            dict(name=['foo', 'bar'],
+                 geo1=[None, None],
+                 geo2=[None, wkt.loads('Point(1 1)')])
+            ),
+        geometry='geo1',
+        )
+
+    table_id = f"{dataset_id}.lake_from_gp"
+    bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+    table = bigquery_client.get_table(table_id)
+    assert table.schema == [
+        SchemaField('name', 'STRING', 'NULLABLE'),
+        SchemaField('geo1', 'GEOGRAPHY', 'NULLABLE'),
+        SchemaField('geo2', 'GEOGRAPHY', 'NULLABLE'),
+    ]
+    assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+        ['bar', None, 'POINT(1 1)'],
+        ['foo', None, None],
+    ]
+
+
+def test_load_dataframe_w_shapely(bigquery_client, dataset_id):
+    pandas = pytest.importorskip("pandas")
+    wkt = pytest.importorskip("shapely.wkt")
+    from google.cloud.bigquery.schema import SchemaField
+
+    df = pandas.DataFrame(
+            dict(name=['foo', 'bar'],
+                 geo=[None, wkt.loads('Point(1 1)')])
+            )
+
+    table_id = f"{dataset_id}.lake_from_shapes"
+    bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+    table = bigquery_client.get_table(table_id)
+    assert table.schema == [
+        SchemaField('name', 'STRING', 'NULLABLE'),
+        SchemaField('geo', 'GEOGRAPHY', 'NULLABLE'),
+    ]
+    assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+        ['bar', 'POINT(1 1)'],
+        ['foo', None],
+    ]
+
+    bigquery_client.load_table_from_dataframe(df, table_id).result()
+    assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+        ['bar', 'POINT(1 1)'],
+        ['bar', 'POINT(1 1)'],
+        ['foo', None],
+        ['foo', None],
+    ]
+
+
+def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
+    pandas = pytest.importorskip("pandas")
+    wkt = pytest.importorskip("shapely.wkt")
+    from shapely import wkb
+    from google.cloud.bigquery.schema import SchemaField
+
+    df = pandas.DataFrame(
+            dict(name=['foo', 'bar'],
+                 geo=[None, wkb.dumps(wkt.loads('Point(1 1)'))])
+            )
+
+    table_id = f"{dataset_id}.lake_from_wkb"
+    # We create the table first, to inform the interpretation of the wkb data
+    bigquery_client.query(f"create table {table_id} (name string, geo GEOGRAPHY)")
+    bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+    table = bigquery_client.get_table(table_id)
+    assert table.schema == [
+        SchemaField('name', 'STRING', 'NULLABLE'),
+        SchemaField('geo', 'GEOGRAPHY', 'NULLABLE'),
+    ]
+    assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+        ['bar', 'POINT(1 1)'],
+        ['foo', None],
+    ]
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index a48aecbf6..f66886804 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -604,6 +604,60 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test):
     assert roundtrip[3] is None
 
 
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_geography_dtype(module_under_test):
+    from shapely import wkb, wkt
+
+    bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+    series = geopandas.GeoSeries([None, wkt.loads('point(0 0)')])
+    array = module_under_test.bq_to_arrow_array(series, bq_field)
+    # The result is binary, because we use wkb format
+    assert array.type == pyarrow.binary()
+    assert array.to_pylist() == [None, wkb.dumps(series[1])]
+
+    # All na:
+    series = geopandas.GeoSeries([None, None])
+    array = module_under_test.bq_to_arrow_array(series, bq_field)
+    assert array.type == pyarrow.string()
+    assert array.to_pylist() == list(series)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test):
+    from shapely import wkb, wkt
+
+    bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+    series = pandas.Series([None, wkt.loads('point(0 0)')])
+    array = module_under_test.bq_to_arrow_array(series, bq_field)
+    # The result is binary, because we use wkb format
+    assert array.type == pyarrow.binary()
+    assert array.to_pylist() == [None, wkb.dumps(series[1])]
+
+    # All na:
+    series = pandas.Series([None, None])
+    array = module_under_test.bq_to_arrow_array(series, bq_field)
+    assert array.type == pyarrow.string()
+    assert array.to_pylist() == list(series)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test):
+    from shapely import wkb, wkt
+
+    bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+    series = pandas.Series([None, wkb.dumps(wkt.loads('point(0 0)'))])
+    array = module_under_test.bq_to_arrow_array(series, bq_field)
+    # The result is binary, because we use wkb format
+    assert array.type == pyarrow.binary()
+    assert array.to_pylist() == list(series)
+
+
 @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
 def test_bq_to_arrow_schema_w_unknown_type(module_under_test):
     fields = (
@@ -1602,3 +1656,19 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test)
 def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test):
     dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {})
     assert isinstance(dataframe, pandas.DataFrame)
+
+
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_field_type_override(module_under_test):
+    # When loading pandas data, we may need to override the type
+    # decision based on data contents, because GEOGRAPHY data can be
+    # stored as either text or binary.
+
+    assert module_under_test.bq_to_arrow_field(
+        schema.SchemaField('g', 'GEOGRAPHY')
+        ).type == pyarrow.string()
+
+    assert module_under_test.bq_to_arrow_field(
+        schema.SchemaField('g', 'GEOGRAPHY'),
+        pyarrow.binary(),
+        ).type == pyarrow.binary()

From e23b83012b37f7960034210174d4f4be5fa22486 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 4 Aug 2021 16:43:49 -0400
Subject: [PATCH 15/40] Small (14% for small polygons) optimization for reading
 wkt data

---
 google/cloud/bigquery/table.py | 10 ++++++----
 tests/unit/test_table.py       |  8 ++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index b626defb1..ee5d615a3 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -38,9 +38,11 @@
     CRS = "EPSG:4326"
 
 try:
-    from shapely import wkt
+    import shapely.geos
 except ImportError:
-    wkt = None
+    shapely = None
+else:
+    _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read
 
 try:
     import pyarrow
@@ -1941,7 +1943,7 @@ def to_dataframe(
         """
         if pandas is None:
             raise ValueError(_NO_PANDAS_ERROR)
-        if geography_as_object and wkt is None:
+        if geography_as_object and shapely is None:
             raise ValueError(_NO_SHAPELY_ERROR)
 
         if dtypes is None:
@@ -1987,7 +1989,7 @@ def to_dataframe(
         if geography_as_object:
             for field in self.schema:
                 if field.field_type.upper() == "GEOGRAPHY":
-                    df[field.name] = df[field.name].dropna().apply(wkt.loads)
+                    df[field.name] = df[field.name].dropna().apply(_read_wkt)
 
         return df
 
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 7c0057404..b8bb62faf 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -3032,8 +3032,8 @@ def test_to_dataframe_error_if_pandas_is_none(self):
             row_iterator.to_dataframe()
 
     @unittest.skipIf(pandas is None, "Requires `pandas`")
-    @mock.patch("google.cloud.bigquery.table.wkt", new=None)
-    def test_to_dataframe_error_if_shapely_wkt_is_none(self):
+    @mock.patch("google.cloud.bigquery.table.shapely", new=None)
+    def test_to_dataframe_error_if_shapely_is_none(self):
         with self.assertRaisesRegex(
             ValueError,
             re.escape(
@@ -3945,7 +3945,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
         variations, which are tested for to_dataframe.
         """
         import numpy
-        import shapely
+        from shapely import wkt
 
         row_iterator = self._make_one_from_data(
             (("name", "STRING"), ("g", "GEOGRAPHY"))
@@ -3958,7 +3958,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
         geography_column = "g"
 
         to_dataframe.return_value = pandas.DataFrame(
-            dict(name=["foo"], g=[shapely.wkt.loads("point(0 0)")],)
+            dict(name=["foo"], g=[wkt.loads("point(0 0)")],)
         )
 
         df = row_iterator.to_geodataframe(

From 950d957428afef407b9c7d3e927a3aad112d09ab Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 4 Aug 2021 16:48:35 -0400
Subject: [PATCH 16/40] blacken/lint

---
 google/cloud/bigquery/_pandas_helpers.py | 22 +++++----
 google/cloud/bigquery/job/query.py       |  1 +
 tests/system/test_pandas.py              | 58 ++++++++++++------------
 tests/unit/test__pandas_helpers.py       | 43 ++++++++++--------
 4 files changed, 68 insertions(+), 56 deletions(-)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index a3b872575..636167511 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -35,13 +35,18 @@
     _BaseGeometry = type(None)
 else:
     if pandas is not None:
+
         def _to_wkb():
             from shapely.geos import WKBWriter, lgeos
+
             write = WKBWriter(lgeos).write
             notnull = pandas.notnull
+
             def _to_wkb(v):
                 return write(v) if notnull(v) else v
+
             return _to_wkb
+
         _to_wkb = _to_wkb()
 
 try:
@@ -142,7 +147,6 @@ def pyarrow_timestamp():
         "STRING": pyarrow.string,
         "TIME": pyarrow_time,
         "TIMESTAMP": pyarrow_timestamp,
-        "GEOGRAPHY": pyarrow.string,  # Unless we have ninary data
     }
     ARROW_SCALAR_IDS_TO_BQ = {
         # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
@@ -255,17 +259,17 @@ def bq_to_arrow_schema(bq_schema):
 
 
 def bq_to_arrow_array(series, bq_field):
-    if bq_field.field_type.upper() == 'GEOGRAPHY':
+    if bq_field.field_type.upper() == "GEOGRAPHY":
         arrow_type = None
         first = _first_valid(series)
         if first is not None:
-            if series.dtype.name == 'geometry' or isinstance(first, _BaseGeometry):
+            if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry):
                 arrow_type = pyarrow.binary()
                 # Convert shapey geometry to WKB binary format:
                 series = series.apply(_to_wkb)
             elif isinstance(first, bytes):
                 arrow_type = pyarrow.binary()
-        elif series.dtype.name == 'geometry':
+        elif series.dtype.name == "geometry":
             # We have a GeoSeries containing all nulls, convert it to a pandas series
             series = pandas.Series(numpy.array(series))
 
@@ -331,6 +335,7 @@ def _first_valid(series):
     if first_valid_index is not None:
         return series.at[first_valid_index]
 
+
 def dataframe_to_bq_schema(dataframe, bq_schema):
     """Convert a pandas DataFrame schema to a BigQuery schema.
 
@@ -373,10 +378,11 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
         bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
         if bq_type is None:
             sample_data = _first_valid(dataframe[column])
-            if (isinstance(sample_data, _BaseGeometry)
-                and not sample_data is None  # Paranoia
-                ):
-                bq_type = 'GEOGRAPHY'
+            if (
+                isinstance(sample_data, _BaseGeometry)
+                and sample_data is not None  # Paranoia
+            ):
+                bq_type = "GEOGRAPHY"
         bq_field = schema.SchemaField(column, bq_type)
         bq_schema_out.append(bq_field)
 
diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index c3c280462..07e88dd60 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -53,6 +53,7 @@
     # Assumption: type checks are only used by library developers and CI environments
     # that have all optional dependencies installed, thus no conditional imports.
     import pandas
+    import geopandas
     import pyarrow
     from google.api_core import retry as retries
     from google.cloud import bigquery_storage
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index d4bcd0f54..d79553c7e 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -870,25 +870,27 @@ def test_load_geodataframe(bigquery_client, dataset_id):
 
     df = geopandas.GeoDataFrame(
         pandas.DataFrame(
-            dict(name=['foo', 'bar'],
-                 geo1=[None, None],
-                 geo2=[None, wkt.loads('Point(1 1)')])
-            ),
-        geometry='geo1',
-        )
+            dict(
+                name=["foo", "bar"],
+                geo1=[None, None],
+                geo2=[None, wkt.loads("Point(1 1)")],
+            )
+        ),
+        geometry="geo1",
+    )
 
     table_id = f"{dataset_id}.lake_from_gp"
     bigquery_client.load_table_from_dataframe(df, table_id).result()
 
     table = bigquery_client.get_table(table_id)
     assert table.schema == [
-        SchemaField('name', 'STRING', 'NULLABLE'),
-        SchemaField('geo1', 'GEOGRAPHY', 'NULLABLE'),
-        SchemaField('geo2', 'GEOGRAPHY', 'NULLABLE'),
+        SchemaField("name", "STRING", "NULLABLE"),
+        SchemaField("geo1", "GEOGRAPHY", "NULLABLE"),
+        SchemaField("geo2", "GEOGRAPHY", "NULLABLE"),
     ]
     assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
-        ['bar', None, 'POINT(1 1)'],
-        ['foo', None, None],
+        ["bar", None, "POINT(1 1)"],
+        ["foo", None, None],
     ]
 
 
@@ -898,29 +900,28 @@ def test_load_dataframe_w_shapely(bigquery_client, dataset_id):
     from google.cloud.bigquery.schema import SchemaField
 
     df = pandas.DataFrame(
-            dict(name=['foo', 'bar'],
-                 geo=[None, wkt.loads('Point(1 1)')])
-            )
+        dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")])
+    )
 
     table_id = f"{dataset_id}.lake_from_shapes"
     bigquery_client.load_table_from_dataframe(df, table_id).result()
 
     table = bigquery_client.get_table(table_id)
     assert table.schema == [
-        SchemaField('name', 'STRING', 'NULLABLE'),
-        SchemaField('geo', 'GEOGRAPHY', 'NULLABLE'),
+        SchemaField("name", "STRING", "NULLABLE"),
+        SchemaField("geo", "GEOGRAPHY", "NULLABLE"),
     ]
     assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
-        ['bar', 'POINT(1 1)'],
-        ['foo', None],
+        ["bar", "POINT(1 1)"],
+        ["foo", None],
     ]
 
     bigquery_client.load_table_from_dataframe(df, table_id).result()
     assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
-        ['bar', 'POINT(1 1)'],
-        ['bar', 'POINT(1 1)'],
-        ['foo', None],
-        ['foo', None],
+        ["bar", "POINT(1 1)"],
+        ["bar", "POINT(1 1)"],
+        ["foo", None],
+        ["foo", None],
     ]
 
 
@@ -931,9 +932,8 @@ def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
     from google.cloud.bigquery.schema import SchemaField
 
     df = pandas.DataFrame(
-            dict(name=['foo', 'bar'],
-                 geo=[None, wkb.dumps(wkt.loads('Point(1 1)'))])
-            )
+        dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))])
+    )
 
     table_id = f"{dataset_id}.lake_from_wkb"
     # We create the table first, to inform the interpretation of the wkb data
@@ -942,10 +942,10 @@ def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
 
     table = bigquery_client.get_table(table_id)
     assert table.schema == [
-        SchemaField('name', 'STRING', 'NULLABLE'),
-        SchemaField('geo', 'GEOGRAPHY', 'NULLABLE'),
+        SchemaField("name", "STRING", "NULLABLE"),
+        SchemaField("geo", "GEOGRAPHY", "NULLABLE"),
     ]
     assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
-        ['bar', 'POINT(1 1)'],
-        ['foo', None],
+        ["bar", "POINT(1 1)"],
+        ["foo", None],
     ]
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index f66886804..e3635e52e 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -611,7 +611,7 @@ def test_bq_to_arrow_array_w_geography_dtype(module_under_test):
 
     bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
 
-    series = geopandas.GeoSeries([None, wkt.loads('point(0 0)')])
+    series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")])
     array = module_under_test.bq_to_arrow_array(series, bq_field)
     # The result is binary, because we use wkb format
     assert array.type == pyarrow.binary()
@@ -631,7 +631,7 @@ def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test):
 
     bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
 
-    series = pandas.Series([None, wkt.loads('point(0 0)')])
+    series = pandas.Series([None, wkt.loads("point(0 0)")])
     array = module_under_test.bq_to_arrow_array(series, bq_field)
     # The result is binary, because we use wkb format
     assert array.type == pyarrow.binary()
@@ -651,7 +651,7 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test):
 
     bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
 
-    series = pandas.Series([None, wkb.dumps(wkt.loads('point(0 0)'))])
+    series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))])
     array = module_under_test.bq_to_arrow_array(series, bq_field)
     # The result is binary, because we use wkb format
     assert array.type == pyarrow.binary()
@@ -1240,17 +1240,19 @@ def test_dataframe_to_bq_schema_geography(module_under_test):
 
     df = geopandas.GeoDataFrame(
         pandas.DataFrame(
-            dict(name=['foo', 'bar'],
-                 geo1=[None, None],
-                 geo2=[None, wkt.loads('Point(1 1)')])
-            ),
-        geometry='geo1',
-        )
+            dict(
+                name=["foo", "bar"],
+                geo1=[None, None],
+                geo2=[None, wkt.loads("Point(1 1)")],
+            )
+        ),
+        geometry="geo1",
+    )
     bq_schema = module_under_test.dataframe_to_bq_schema(df, [])
     assert bq_schema == (
-        schema.SchemaField('name', 'STRING'),
-        schema.SchemaField('geo1', 'GEOGRAPHY'),
-        schema.SchemaField('geo2', 'GEOGRAPHY'),
+        schema.SchemaField("name", "STRING"),
+        schema.SchemaField("geo1", "GEOGRAPHY"),
+        schema.SchemaField("geo2", "GEOGRAPHY"),
     )
 
 
@@ -1664,11 +1666,14 @@ def test_bq_to_arrow_field_type_override(module_under_test):
     # decision based on data contents, because GEOGRAPHY data can be
     # stored as either text or binary.
 
-    assert module_under_test.bq_to_arrow_field(
-        schema.SchemaField('g', 'GEOGRAPHY')
-        ).type == pyarrow.string()
+    assert (
+        module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type
+        == pyarrow.string()
+    )
 
-    assert module_under_test.bq_to_arrow_field(
-        schema.SchemaField('g', 'GEOGRAPHY'),
-        pyarrow.binary(),
-        ).type == pyarrow.binary()
+    assert (
+        module_under_test.bq_to_arrow_field(
+            schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(),
+        ).type
+        == pyarrow.binary()
+    )

From 0493acfa1349309c151775a0fba5efc7572854a1 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 4 Aug 2021 17:13:58 -0400
Subject: [PATCH 17/40] wait for it ...

---
 tests/system/test_pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index d79553c7e..4508302df 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -937,7 +937,8 @@ def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
 
     table_id = f"{dataset_id}.lake_from_wkb"
     # We create the table first, to inform the interpretation of the wkb data
-    bigquery_client.query(f"create table {table_id} (name string, geo GEOGRAPHY)")
+    bigquery_client.query(
+        f"create table {table_id} (name string, geo GEOGRAPHY)").result()
     bigquery_client.load_table_from_dataframe(df, table_id).result()
 
     table = bigquery_client.get_table(table_id)

From 5d986be6616e0b6421436f7a3b531075cd3ff681 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 09:17:36 -0400
Subject: [PATCH 18/40] geopandas needs a slightly newer (but still old)
 version of pandas.

---
 setup.py                    | 4 ++--
 testing/constraints-3.6.txt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index d01f3dcf6..b79cc2940 100644
--- a/setup.py
+++ b/setup.py
@@ -56,8 +56,8 @@
         "grpcio >= 1.38.1, < 2.0dev",
         "pyarrow >= 1.0.0, < 6.0dev",
     ],
-    "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"],
-    "geopandas": ["geopandas>=0.6.0"],
+    "pandas": ["pandas>=0.24.2", "pyarrow >= 1.0.0, < 6.0dev"],
+    "geopandas": ["geopandas>=0.6.0, <1.0dev"],
     "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
     "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
     "opentelemetry": [
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
index af6e82efd..8d119d5ea 100644
--- a/testing/constraints-3.6.txt
+++ b/testing/constraints-3.6.txt
@@ -13,7 +13,7 @@ grpcio==1.38.1
 opentelemetry-api==0.11b0
 opentelemetry-instrumentation==0.11b0
 opentelemetry-sdk==0.11b0
-pandas==0.23.0
+pandas==0.24.2
 proto-plus==1.10.0
 protobuf==3.12.0
 pyarrow==1.0.0

From f5e2f621c1f37036df555907f616bdd23b5f09ae Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 09:19:35 -0400
Subject: [PATCH 19/40] skip coverage on section of code that isn't reachable
 by tests

---
 google/cloud/bigquery/_pandas_helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 636167511..9e3021ed0 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -34,7 +34,7 @@
 except ImportError:  # pragma: NO COVER
     _BaseGeometry = type(None)
 else:
-    if pandas is not None:
+    if pandas is not None:  # pragma: NO COVER
 
         def _to_wkb():
             from shapely.geos import WKBWriter, lgeos

From 1084c4e51d2d6fbe3818c7bab608cfe1b8626765 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 09:42:03 -0400
Subject: [PATCH 20/40] assuage pytype

---
 google/cloud/bigquery/table.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index ee5d615a3..09ac44787 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -67,6 +67,7 @@
     # Unconditionally import optional dependencies again to tell pytype that
     # they are not None, avoiding false "no attribute" errors.
     import pandas
+    import geopandas
     import pyarrow
     from google.cloud import bigquery_storage
 

From cdb821d6fa3b7cdad0475fba4c56611badbcfb8d Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 09:51:56 -0400
Subject: [PATCH 21/40] blacken

---
 tests/system/test_pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 4508302df..0477799ff 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -938,7 +938,8 @@ def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
     table_id = f"{dataset_id}.lake_from_wkb"
     # We create the table first, to inform the interpretation of the wkb data
     bigquery_client.query(
-        f"create table {table_id} (name string, geo GEOGRAPHY)").result()
+        f"create table {table_id} (name string, geo GEOGRAPHY)"
+    ).result()
     bigquery_client.load_table_from_dataframe(df, table_id).result()
 
     table = bigquery_client.get_table(table_id)

From 054a85761dba1c149742f8dcba1dccd2cd1228cf Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 10:34:07 -0400
Subject: [PATCH 22/40] Added geopandas doc snippit

---
 docs/snippets.py      | 24 ++++++++++++++++++++++++
 docs/usage/pandas.rst | 15 +++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/docs/snippets.py b/docs/snippets.py
index 3f9b9a88c..e3a6746dc 100644
--- a/docs/snippets.py
+++ b/docs/snippets.py
@@ -30,6 +30,10 @@
     import pandas
 except (ImportError, AttributeError):
     pandas = None
+try:
+    import geopandas
+except (ImportError, AttributeError):
+    geopandas = None
 try:
     import pyarrow
 except (ImportError, AttributeError):
@@ -934,5 +938,25 @@ def test_list_rows_as_dataframe(client):
     assert len(df) == table.num_rows  # verify the number of rows
 
 
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_query_results_as_geodataframe(client):
+    # [START bigquery_query_results_geodataframe]
+    # from google.cloud import bigquery
+    # client = bigquery.Client()
+
+    sql = """
+        SELECT created_date, complaint_description,
+               ST_GEOGPOINT(longitude, latitude) as location
+        FROM bigquery-public-data.austin_311.311_service_requests
+        LIMIT 10
+    """
+
+    df = client.query(sql).to_geodataframe()
+    # [END bigquery_query_results_geodataframe]
+    assert isinstance(df, geopandas.GeoDataFrame)
+    assert len(list(df)) == 3  # verify the number of columns
+    assert len(df) == 10  # verify the number of rows
+
+
 if __name__ == "__main__":
     pytest.main()
diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst
index 9db98dfbb..658ee627c 100644
--- a/docs/usage/pandas.rst
+++ b/docs/usage/pandas.rst
@@ -37,6 +37,21 @@ To retrieve table rows as a :class:`pandas.DataFrame`:
    :start-after: [START bigquery_list_rows_dataframe]
    :end-before: [END bigquery_list_rows_dataframe]
 
+
+Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame
+------------------------------------------------------------
+
+`GeoPandas <https://geopandas.org/>`_ adds geospatial analytics
+capabilities to Pandas.  To retrieve query results containing
+GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`:
+
+.. literalinclude:: ../snippets.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_query_results_geodataframe]
+   :end-before: [END bigquery_query_results_geodataframe]
+
+
 Load a Pandas DataFrame to a BigQuery Table
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

From 0924a034c69ac5eb9207680fe4fcce679f413566 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 10:52:09 -0400
Subject: [PATCH 23/40] make sure coordinate system is set correctly

---
 google/cloud/bigquery/table.py | 2 +-
 tests/system/test_pandas.py    | 4 ++++
 tests/unit/test_table.py       | 6 ++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 09ac44787..1bc3574b3 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -2208,7 +2208,7 @@ def to_geodataframe(
         """
         if geopandas is None:
             raise ValueError(_NO_GEOPANDAS_ERROR)
-        return geopandas.GeoDataFrame()
+        return geopandas.GeoDataFrame(crs=CRS)
 
     def to_dataframe_iterable(
         self,
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 0477799ff..4324fa519 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -860,6 +860,10 @@ def test_to_geodataframe(bigquery_client, dataset_id):
         "POINT (0 0)",
     ]
     assert str(df.area) == ("0    0.5\n" "1    NaN\n" "2    0.0\n" "dtype: float64")
+    assert df.crs.srs == 'EPSG:4326'
+    assert df.crs.name == 'WGS 84'
+    assert df.geog.crs.srs == 'EPSG:4326'
+    assert df.geog.crs.name == 'WGS 84'
 
 
 def test_load_geodataframe(bigquery_client, dataset_id):
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index b8bb62faf..b9b44ce7c 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -1689,6 +1689,8 @@ def test_to_geodataframe(self):
         df = row_iterator.to_geodataframe(create_bqstorage_client=False)
         self.assertIsInstance(df, geopandas.GeoDataFrame)
         self.assertEqual(len(df), 0)  # verify the number of rows
+        self.assertEqual(df.crs.srs, 'EPSG:4326')
+        self.assertEqual(df.crs.name, 'WGS 84')
 
 
 class TestRowIterator(unittest.TestCase):
@@ -3853,6 +3855,10 @@ def test_to_geodataframe(self):
         self.assertIsInstance(df.geog, geopandas.GeoSeries)
         self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
         self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
+        self.assertEqual(df.crs.srs, 'EPSG:4326')
+        self.assertEqual(df.crs.name, 'WGS 84')
+        self.assertEqual(df.geog.crs.srs, 'EPSG:4326')
+        self.assertEqual(df.geog.crs.name, 'WGS 84')
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
     def test_to_geodataframe_ambiguous_geog(self):

From c9f8ab6798633d2cebe7fa4bdbc941c93ff0b4b7 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 11:02:32 -0400
Subject: [PATCH 24/40] We need at least geopandas 0.9.0, which happens to be
 the current version

---
 setup.py                    | 2 +-
 testing/constraints-3.6.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b79cc2940..174962625 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@
         "pyarrow >= 1.0.0, < 6.0dev",
     ],
     "pandas": ["pandas>=0.24.2", "pyarrow >= 1.0.0, < 6.0dev"],
-    "geopandas": ["geopandas>=0.6.0, <1.0dev"],
+    "geopandas": ["geopandas>=0.9.0, <1.0dev"],
     "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
     "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
     "opentelemetry": [
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
index 8d119d5ea..b4aa8cf3a 100644
--- a/testing/constraints-3.6.txt
+++ b/testing/constraints-3.6.txt
@@ -5,6 +5,7 @@
 #
 # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
 # Then this file should have foo==1.14.0
+geopandas=0.9.0
 google-api-core==1.29.0
 google-cloud-bigquery-storage==2.0.0
 google-cloud-core==1.4.1

From b61bebc75a794983185659f60212062dd357d8cb Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 11:11:59 -0400
Subject: [PATCH 25/40] Set version bounds for shapely

and fixed constraint for geopandas
---
 setup.py                    | 2 +-
 testing/constraints-3.6.txt | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 174962625..62e926b76 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@
         "pyarrow >= 1.0.0, < 6.0dev",
     ],
     "pandas": ["pandas>=0.24.2", "pyarrow >= 1.0.0, < 6.0dev"],
-    "geopandas": ["geopandas>=0.9.0, <1.0dev"],
+    "geopandas": ["geopandas>=0.9.0, <1.0dev", "shapely>=1.6.0, <2.0dev"],
     "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
     "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
     "opentelemetry": [
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
index b4aa8cf3a..7571817d0 100644
--- a/testing/constraints-3.6.txt
+++ b/testing/constraints-3.6.txt
@@ -5,7 +5,7 @@
 #
 # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
 # Then this file should have foo==1.14.0
-geopandas=0.9.0
+geopandas==0.9.0
 google-api-core==1.29.0
 google-cloud-bigquery-storage==2.0.0
 google-cloud-core==1.4.1
@@ -19,5 +19,6 @@ proto-plus==1.10.0
 protobuf==3.12.0
 pyarrow==1.0.0
 requests==2.18.0
+shapely==1.6.0
 six==1.13.0
 tqdm==4.7.4

From edbea72ae173060821de85fc0a6e3b8a4c4eeab0 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Thu, 5 Aug 2021 12:07:50 -0400
Subject: [PATCH 26/40] blacken

---
 tests/system/test_pandas.py |  8 ++++----
 tests/unit/test_table.py    | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 4324fa519..48233506d 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -860,10 +860,10 @@ def test_to_geodataframe(bigquery_client, dataset_id):
         "POINT (0 0)",
     ]
     assert str(df.area) == ("0    0.5\n" "1    NaN\n" "2    0.0\n" "dtype: float64")
-    assert df.crs.srs == 'EPSG:4326'
-    assert df.crs.name == 'WGS 84'
-    assert df.geog.crs.srs == 'EPSG:4326'
-    assert df.geog.crs.name == 'WGS 84'
+    assert df.crs.srs == "EPSG:4326"
+    assert df.crs.name == "WGS 84"
+    assert df.geog.crs.srs == "EPSG:4326"
+    assert df.geog.crs.name == "WGS 84"
 
 
 def test_load_geodataframe(bigquery_client, dataset_id):
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index b9b44ce7c..8965fff18 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -1689,8 +1689,8 @@ def test_to_geodataframe(self):
         df = row_iterator.to_geodataframe(create_bqstorage_client=False)
         self.assertIsInstance(df, geopandas.GeoDataFrame)
         self.assertEqual(len(df), 0)  # verify the number of rows
-        self.assertEqual(df.crs.srs, 'EPSG:4326')
-        self.assertEqual(df.crs.name, 'WGS 84')
+        self.assertEqual(df.crs.srs, "EPSG:4326")
+        self.assertEqual(df.crs.name, "WGS 84")
 
 
 class TestRowIterator(unittest.TestCase):
@@ -3855,10 +3855,10 @@ def test_to_geodataframe(self):
         self.assertIsInstance(df.geog, geopandas.GeoSeries)
         self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
         self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
-        self.assertEqual(df.crs.srs, 'EPSG:4326')
-        self.assertEqual(df.crs.name, 'WGS 84')
-        self.assertEqual(df.geog.crs.srs, 'EPSG:4326')
-        self.assertEqual(df.geog.crs.name, 'WGS 84')
+        self.assertEqual(df.crs.srs, "EPSG:4326")
+        self.assertEqual(df.crs.name, "WGS 84")
+        self.assertEqual(df.geog.crs.srs, "EPSG:4326")
+        self.assertEqual(df.geog.crs.name, "WGS 84")
 
     @unittest.skipIf(geopandas is None, "Requires `geopandas`")
     def test_to_geodataframe_ambiguous_geog(self):

From b6f19c998014b195a888523d2a60f3119a25cf8d Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Mon, 16 Aug 2021 15:32:50 -0400
Subject: [PATCH 27/40] rename CRS to _COORDINATE_REFERENCE_SYSTEM

---
 google/cloud/bigquery/table.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 1bc3574b3..bf1943b14 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -35,7 +35,7 @@
 except ImportError:
     geopandas = None
 else:
-    CRS = "EPSG:4326"
+    _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326"
 
 try:
     import shapely.geos
@@ -2113,7 +2113,9 @@ def to_geodataframe(
             geography_as_object=True,
         )
 
-        return geopandas.GeoDataFrame(df, crs=CRS, geometry=geography_column)
+        return geopandas.GeoDataFrame(
+            df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column
+        )
 
 
 class _EmptyRowIterator(RowIterator):
@@ -2208,7 +2210,7 @@ def to_geodataframe(
         """
         if geopandas is None:
             raise ValueError(_NO_GEOPANDAS_ERROR)
-        return geopandas.GeoDataFrame(crs=CRS)
+        return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM)
 
     def to_dataframe_iterable(
         self,

From 30435857e308e353d3851292deff8f97ccdde2cd Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Mon, 16 Aug 2021 15:58:55 -0400
Subject: [PATCH 28/40] Fixed intersphinx links

---
 docs/conf.py                       | 2 ++
 google/cloud/bigquery/job/query.py | 4 ++--
 google/cloud/bigquery/table.py     | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index cb347160d..4249a8281 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -365,6 +365,8 @@
     "grpc": ("https://grpc.github.io/grpc/python/", None),
     "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
     "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
+    "pandas": ('http://pandas.pydata.org/pandas-docs/dev', None),
+    "geopandas": ("https://geopandas.org/", None)
 }
 
 
diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index 07e88dd60..11cec75bd 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -1484,7 +1484,7 @@ def to_dataframe(
                 .. versionadded:: 2.24.0
 
         Returns:
-            `pandas.DataFrame`:
+            pandas.DataFrame:
                 A :class:`~pandas.DataFrame` populated with row data
                 and column headers from the query results. The column
                 headers are derived from the destination table's
@@ -1578,7 +1578,7 @@ def to_geodataframe(
                 only one GEOGRAPHY column.
 
         Returns:
-            `geopandas.GeoDataFrame`:
+            geopandas.GeoDataFrame:
                 A :class:`geopandas.GeoDataFrame` populated with row
                 data and column headers from the query results. The
                 column headers are derived from the destination
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index c9cb844b8..807dc1888 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -1966,7 +1966,7 @@ def to_dataframe(
                 .. versionadded:: 2.24.0
 
         Returns:
-            `pandas.DataFrame`:
+            pandas.DataFrame:
                 A :class:`~pandas.DataFrame` populated with row data and column
                 headers from the query results. The column headers are derived
                 from the destination table's schema.
@@ -2099,7 +2099,7 @@ def to_geodataframe(
                 only one GEOGRAPHY column.
 
         Returns:
-            `geopandas.GeoDataFrame`:
+            geopandas.GeoDataFrame:
                 A :class:`geopandas.GeoDataFrame` populated with row
                 data and column headers from the query results. The
                 column headers are derived from the destination

From fe8ea8a2a0ca8a1ab47157e3ee5d1497de4d005b Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Mon, 16 Aug 2021 16:06:20 -0400
Subject: [PATCH 29/40] capitalize shapely

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 62e926b76..37d72bf4f 100644
--- a/setup.py
+++ b/setup.py
@@ -57,7 +57,7 @@
         "pyarrow >= 1.0.0, < 6.0dev",
     ],
     "pandas": ["pandas>=0.24.2", "pyarrow >= 1.0.0, < 6.0dev"],
-    "geopandas": ["geopandas>=0.9.0, <1.0dev", "shapely>=1.6.0, <2.0dev"],
+    "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"],
     "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
     "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
     "opentelemetry": [

From 2d3c37212f28fcee93bce3605a7f09dfbd96ef49 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 17 Aug 2021 09:40:16 -0400
Subject: [PATCH 30/40] blacken ...whimper

---
 docs/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 1958b6fce..59a2d8fb3 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -366,8 +366,8 @@
     "grpc": ("https://grpc.github.io/grpc/python/", None),
     "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
     "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
-    "pandas": ('http://pandas.pydata.org/pandas-docs/dev', None),
-    "geopandas": ("https://geopandas.org/", None)
+    "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None),
+    "geopandas": ("https://geopandas.org/", None),
 }
 
 

From 1a977fb327594c64c9c78f6fd3dfae8f3bc563de Mon Sep 17 00:00:00 2001
From: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Date: Tue, 17 Aug 2021 18:07:12 +0000
Subject: [PATCH 31/40] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md
---
 docs/conf.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 59a2d8fb3..09f7ea414 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -366,8 +366,6 @@
     "grpc": ("https://grpc.github.io/grpc/python/", None),
     "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
     "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
-    "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None),
-    "geopandas": ("https://geopandas.org/", None),
 }
 
 

From 7dd9f5edddd1c277700aa0378ae902939b9d82a4 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 17 Aug 2021 14:23:48 -0400
Subject: [PATCH 32/40] tell owlbot about pandas and geopandas intersphinx

---
 docs/conf.py | 2 ++
 owlbot.py    | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index 09f7ea414..59a2d8fb3 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -366,6 +366,8 @@
     "grpc": ("https://grpc.github.io/grpc/python/", None),
     "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
     "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
+    "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None),
+    "geopandas": ("https://geopandas.org/", None),
 }
 
 
diff --git a/owlbot.py b/owlbot.py
index 09845480a..ea9904cdb 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -97,6 +97,10 @@
     samples=True,
     microgenerator=True,
     split_system_tests=True,
+    intersphinx_dependencies={
+        "pandas": 'http://pandas.pydata.org/pandas-docs/dev',
+        "geopandas": "https://geopandas.org/",
+    }
 )
 
 # BigQuery has a custom multiprocessing note

From 3c257d9940f7f741826ab20f97d7fda160a034ef Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 17 Aug 2021 15:21:31 -0400
Subject: [PATCH 33/40] Explain the _to_wkb hijinks

---
 google/cloud/bigquery/_pandas_helpers.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 4ed9cd279..f72bf0731 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -35,6 +35,12 @@
     if pandas is not None:  # pragma: NO COVER
 
         def _to_wkb():
+            # Create a closure that:
+            # - Adds a not-null check. This allows the returned function to
+            #   be used directly with apply, unlike `shapely.wkb.dumps`.
+            # - Avoid extra work done by `shapely.wkb.dumps` that we don't need.
+            # - Caches the WKBWriter (and write method lookup :) )
+            # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace.
             from shapely.geos import WKBWriter, lgeos
 
             write = WKBWriter(lgeos).write

From c73b85683372a322ad957b2af7fa2767f7fbd68b Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 17 Aug 2021 15:37:32 -0400
Subject: [PATCH 34/40] moar comments

---
 google/cloud/bigquery/_pandas_helpers.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index f72bf0731..ab58b1729 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -28,8 +28,10 @@
     import numpy
 
 try:
+    # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
     from shapely.geometry.base import BaseGeometry as _BaseGeometry
 except ImportError:  # pragma: NO COVER
+    # No shapely, use NoneType for _BaseGeometry as a placeholder.
     _BaseGeometry = type(None)
 else:
     if pandas is not None:  # pragma: NO COVER

From 5988052944a034c1a2ecd586bcd6eaa32aa4e9e5 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 17 Aug 2021 16:24:26 -0400
Subject: [PATCH 35/40] Don't rely on string representations in assertions

---
 tests/system/test_pandas.py | 39 +++++++++++++++----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 3a210b3a2..febb6c97f 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -801,7 +801,7 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
 
 
 def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
-    pytest.importorskip("shapely")
+    wkt = pytest.importorskip("shapely.wkt")
     bigquery_client.query(
         f"create table {dataset_id}.lake (name string, geog geography)"
     ).result()
@@ -816,17 +816,15 @@ def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
     df = bigquery_client.query(
         f"select * from {dataset_id}.lake order by name"
     ).to_dataframe(geography_as_object=True)
-    assert str(df) == (
-        "  name         geog\n"
-        "0  bar  POINT (0 1)\n"
-        "1  baz          NaN\n"
-        "2  foo  POINT (0 0)"
-    )
-    assert [v.__class__.__name__ for v in df["geog"]] == ["Point", "float", "Point"]
+    assert list(df['name']) == ['bar', 'baz', 'foo']
+    assert df['geog'][0] == wkt.loads('point(0 1)')
+    assert pandas.isna(df['geog'][1])
+    assert df['geog'][2] == wkt.loads('point(0 0)')
 
 
 def test_to_geodataframe(bigquery_client, dataset_id):
-    pytest.importorskip("geopandas")
+    geopandas = pytest.importorskip("geopandas")
+    from shapely import wkt
     bigquery_client.query(
         f"create table {dataset_id}.geolake (name string, geog geography)"
     ).result()
@@ -834,26 +832,21 @@ def test_to_geodataframe(bigquery_client, dataset_id):
         f"""
         insert into {dataset_id}.geolake (name, geog) values
         ('foo', st_geogfromtext('point(0 0)')),
-        ('bar', st_geogfromtext('polygon((0 0, 1 1, 1 0, 0 0))')),
+        ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')),
         ('baz', null)
         """
     ).result()
     df = bigquery_client.query(
         f"select * from {dataset_id}.geolake order by name"
     ).to_geodataframe()
-    assert [v.__class__.__name__ for v in df["geog"]] == [
-        "Polygon",
-        "NoneType",
-        "Point",
-    ]
-    assert df.__class__.__name__ == "GeoDataFrame"
-    assert df["geog"].__class__.__name__ == "GeoSeries"
-    assert list(map(str, df["geog"])) == [
-        "POLYGON ((1 0, 1 1, 0 0, 1 0))",
-        "None",
-        "POINT (0 0)",
-    ]
-    assert str(df.area) == ("0    0.5\n" "1    NaN\n" "2    0.0\n" "dtype: float64")
+    assert df["geog"][0] == wkt.loads('polygon((0 0, 1 0, 1 1, 0 0))')
+    assert pandas.isna(df['geog'][1])
+    assert df["geog"][2] == wkt.loads('point(0 0)')
+    assert isinstance(df, geopandas.GeoDataFrame)
+    assert isinstance(df["geog"], geopandas.GeoSeries)
+    assert df.area[0] == 0.5
+    assert pandas.isna(df.area[1])
+    assert df.area[2] == 0.0
     assert df.crs.srs == "EPSG:4326"
     assert df.crs.name == "WGS 84"
     assert df.geog.crs.srs == "EPSG:4326"

From 3207e578638766da7e3fa48ea0be72db7ab39fc7 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 17 Aug 2021 17:26:37 -0400
Subject: [PATCH 36/40] move to_geodataframe snippet to samples

---
 docs/snippets.py                          | 24 -------------
 docs/usage/pandas.rst                     |  2 +-
 samples/geography/requirements.txt        | 44 +++++++++++++++++++++++
 samples/geography/to_geodataframe.py      | 32 +++++++++++++++++
 samples/geography/to_geodataframe_test.py | 25 +++++++++++++
 5 files changed, 102 insertions(+), 25 deletions(-)
 create mode 100644 samples/geography/to_geodataframe.py
 create mode 100644 samples/geography/to_geodataframe_test.py

diff --git a/docs/snippets.py b/docs/snippets.py
index dd1f07e80..c62001fc0 100644
--- a/docs/snippets.py
+++ b/docs/snippets.py
@@ -30,10 +30,6 @@
     import pandas
 except (ImportError, AttributeError):
     pandas = None
-try:
-    import geopandas
-except (ImportError, AttributeError):
-    geopandas = None
 try:
     import pyarrow
 except (ImportError, AttributeError):
@@ -939,25 +935,5 @@ def test_list_rows_as_dataframe(client):
     assert len(df) == table.num_rows  # verify the number of rows
 
 
-@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
-def test_query_results_as_geodataframe(client):
-    # [START bigquery_query_results_geodataframe]
-    # from google.cloud import bigquery
-    # client = bigquery.Client()
-
-    sql = """
-        SELECT created_date, complaint_description,
-               ST_GEOGPOINT(longitude, latitude) as location
-        FROM bigquery-public-data.austin_311.311_service_requests
-        LIMIT 10
-    """
-
-    df = client.query(sql).to_geodataframe()
-    # [END bigquery_query_results_geodataframe]
-    assert isinstance(df, geopandas.GeoDataFrame)
-    assert len(list(df)) == 3  # verify the number of columns
-    assert len(df) == 10  # verify the number of rows
-
-
 if __name__ == "__main__":
     pytest.main()
diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst
index 658ee627c..92eee67cf 100644
--- a/docs/usage/pandas.rst
+++ b/docs/usage/pandas.rst
@@ -45,7 +45,7 @@ Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame
 capabilities to Pandas.  To retrieve query results containing
 GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`:
 
-.. literalinclude:: ../snippets.py
+.. literalinclude:: ../samples/geography/to_geodataframe.py
    :language: python
    :dedent: 4
    :start-after: [START bigquery_query_results_geodataframe]
diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt
index dfee339d4..18a0e921f 100644
--- a/samples/geography/requirements.txt
+++ b/samples/geography/requirements.txt
@@ -1,4 +1,48 @@
+attrs==21.2.0
+cachetools==4.2.2
+certifi==2021.5.30
+cffi==1.14.6
+charset-normalizer==2.0.4
+click==8.0.1
+click-plugins==1.1.1
+cligj==0.7.2
+dataclasses==0.8
+Fiona==1.8.20
 geojson==2.5.0
+geopandas==0.9.0
+google-api-core==1.31.2
+google-auth==1.35.0
 google-cloud-bigquery==2.24.0
 google-cloud-bigquery-storage==2.6.3
+google-cloud-core==1.7.2
+google-crc32c==1.1.2
+google-resumable-media==1.3.3
+googleapis-common-protos==1.53.0
+grpcio==1.39.0
+idna==3.2
+importlib-metadata==4.6.4
+libcst==0.3.20
+munch==2.5.0
+mypy-extensions==0.4.3
+numpy==1.19.5
+packaging==21.0
+pandas==1.1.5
+proto-plus==1.19.0
+protobuf==3.17.3
+pyarrow==5.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+pyparsing==2.4.7
+pyproj==3.0.1
+python-dateutil==2.8.2
+pytz==2021.1
+PyYAML==5.4.1
+requests==2.26.0
+rsa==4.7.2
 Shapely==1.7.1
+six==1.16.0
+typing-extensions==3.10.0.0
+typing-inspect==0.7.1
+urllib3==1.26.6
+zipp==3.5.0
diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py
new file mode 100644
index 000000000..fa8073fef
--- /dev/null
+++ b/samples/geography/to_geodataframe.py
@@ -0,0 +1,32 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+
+client = bigquery.Client()
+
+
+def get_austin_service_requests_as_geography():
+    # [START bigquery_query_results_geodataframe]
+
+    sql = """
+        SELECT created_date, complaint_description,
+               ST_GEOGPOINT(longitude, latitude) as location
+        FROM bigquery-public-data.austin_311.311_service_requests
+        LIMIT 10
+    """
+
+    df = client.query(sql).to_geodataframe()
+    # [END bigquery_query_results_geodataframe]
+    return df
diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py
new file mode 100644
index 000000000..7a2ba6937
--- /dev/null
+++ b/samples/geography/to_geodataframe_test.py
@@ -0,0 +1,25 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from .to_geodataframe import get_austin_service_requests_as_geography
+
+
+def test_get_austin_service_requests_as_geography():
+    geopandas = pytest.importorskip("geopandas")
+    df = get_austin_service_requests_as_geography()
+    assert isinstance(df, geopandas.GeoDataFrame)
+    assert len(list(df)) == 3  # verify the number of columns
+    assert len(df) == 10  # verify the number of rows

From 27071a09546a81079e85ac7375c204e80a93f837 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Tue, 17 Aug 2021 17:33:30 -0400
Subject: [PATCH 37/40] Removed pointless importskips

---
 tests/system/test_pandas.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index febb6c97f..fadf6d684 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -886,7 +886,6 @@ def test_load_geodataframe(bigquery_client, dataset_id):
 
 
 def test_load_dataframe_w_shapely(bigquery_client, dataset_id):
-    pandas = pytest.importorskip("pandas")
     wkt = pytest.importorskip("shapely.wkt")
     from google.cloud.bigquery.schema import SchemaField
 
@@ -917,7 +916,6 @@ def test_load_dataframe_w_shapely(bigquery_client, dataset_id):
 
 
 def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
-    pandas = pytest.importorskip("pandas")
     wkt = pytest.importorskip("shapely.wkt")
     from shapely import wkb
     from google.cloud.bigquery.schema import SchemaField

From abf473e6c2539e48960c876d60e49bb1d0459197 Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 18 Aug 2021 08:29:47 -0400
Subject: [PATCH 38/40] blacken

---
 tests/system/test_pandas.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index fadf6d684..836f93210 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -816,15 +816,16 @@ def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
     df = bigquery_client.query(
         f"select * from {dataset_id}.lake order by name"
     ).to_dataframe(geography_as_object=True)
-    assert list(df['name']) == ['bar', 'baz', 'foo']
-    assert df['geog'][0] == wkt.loads('point(0 1)')
-    assert pandas.isna(df['geog'][1])
-    assert df['geog'][2] == wkt.loads('point(0 0)')
+    assert list(df["name"]) == ["bar", "baz", "foo"]
+    assert df["geog"][0] == wkt.loads("point(0 1)")
+    assert pandas.isna(df["geog"][1])
+    assert df["geog"][2] == wkt.loads("point(0 0)")
 
 
 def test_to_geodataframe(bigquery_client, dataset_id):
     geopandas = pytest.importorskip("geopandas")
     from shapely import wkt
+
     bigquery_client.query(
         f"create table {dataset_id}.geolake (name string, geog geography)"
     ).result()
@@ -839,9 +840,9 @@ def test_to_geodataframe(bigquery_client, dataset_id):
     df = bigquery_client.query(
         f"select * from {dataset_id}.geolake order by name"
     ).to_geodataframe()
-    assert df["geog"][0] == wkt.loads('polygon((0 0, 1 0, 1 1, 0 0))')
-    assert pandas.isna(df['geog'][1])
-    assert df["geog"][2] == wkt.loads('point(0 0)')
+    assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))")
+    assert pandas.isna(df["geog"][1])
+    assert df["geog"][2] == wkt.loads("point(0 0)")
     assert isinstance(df, geopandas.GeoDataFrame)
     assert isinstance(df["geog"], geopandas.GeoSeries)
     assert df.area[0] == 0.5

From 88b0653ec9a6b46b9ffc4944a467572d11d4adde Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Wed, 18 Aug 2021 08:34:09 -0400
Subject: [PATCH 39/40] use older dataclasses to try to deal with pip error in
 CI

---
 samples/geography/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt
index 18a0e921f..b1e1f3b75 100644
--- a/samples/geography/requirements.txt
+++ b/samples/geography/requirements.txt
@@ -6,7 +6,7 @@ charset-normalizer==2.0.4
 click==8.0.1
 click-plugins==1.1.1
 cligj==0.7.2
-dataclasses==0.8
+dataclasses==0.6
 Fiona==1.8.20
 geojson==2.5.0
 geopandas==0.9.0

From 1b6b1d76b08061b6b3128c201f43668b62251e9c Mon Sep 17 00:00:00 2001
From: Jim Fulton <jim@jimfulton.info>
Date: Mon, 23 Aug 2021 14:59:19 -0600
Subject: [PATCH 40/40] Don't need dataclasses after Python 3.6

It's built in.
---
 samples/geography/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt
index 3b902da80..7a76b4033 100644
--- a/samples/geography/requirements.txt
+++ b/samples/geography/requirements.txt
@@ -6,7 +6,7 @@ charset-normalizer==2.0.4
 click==8.0.1
 click-plugins==1.1.1
 cligj==0.7.2
-dataclasses==0.6
+dataclasses==0.6; python_version < '3.7'
 Fiona==1.8.20
 geojson==2.5.0
 geopandas==0.9.0