Skip to content

Commit

Permalink
test: refactor list_rows tests and add test for scalars (#829)
Browse files Browse the repository at this point in the history
* test: refactor `list_rows` tests and add test for scalars

* fix JSON formatting

* add TODO for INTERVAL Arrow support

* format tests
  • Loading branch information
tswast committed Aug 13, 2021
1 parent c44d45b commit e3704c3
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 83 deletions.
4 changes: 2 additions & 2 deletions tests/data/scalars.jsonl
@@ -1,2 +1,2 @@
{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null}
10 changes: 5 additions & 5 deletions tests/data/scalars_extreme.jsonl
@@ -1,5 +1,5 @@
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null}
{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null}
54 changes: 32 additions & 22 deletions tests/data/scalars_schema.json
@@ -1,33 +1,33 @@
[
{
"mode": "NULLABLE",
"name": "timestamp_col",
"type": "TIMESTAMP"
"name": "bool_col",
"type": "BOOLEAN"
},
{
"mode": "NULLABLE",
"name": "time_col",
"type": "TIME"
"name": "bignumeric_col",
"type": "BIGNUMERIC"
},
{
"mode": "NULLABLE",
"name": "float64_col",
"type": "FLOAT"
"name": "bytes_col",
"type": "BYTES"
},
{
"mode": "NULLABLE",
"name": "datetime_col",
"type": "DATETIME"
"name": "date_col",
"type": "DATE"
},
{
"mode": "NULLABLE",
"name": "bignumeric_col",
"type": "BIGNUMERIC"
"name": "datetime_col",
"type": "DATETIME"
},
{
"mode": "NULLABLE",
"name": "numeric_col",
"type": "NUMERIC"
"name": "float64_col",
"type": "FLOAT"
},
{
"mode": "NULLABLE",
Expand All @@ -36,27 +36,37 @@
},
{
"mode": "NULLABLE",
"name": "date_col",
"type": "DATE"
"name": "int64_col",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "string_col",
"type": "STRING"
"name": "interval_col",
"type": "INTERVAL"
},
{
"mode": "NULLABLE",
"name": "bool_col",
"type": "BOOLEAN"
"name": "numeric_col",
"type": "NUMERIC"
},
{
"mode": "REQUIRED",
"name": "rowindex",
"type": "INTEGER"
},
{
"mode": "NULLABLE",
"name": "bytes_col",
"type": "BYTES"
"name": "string_col",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "int64_col",
"type": "INTEGER"
"name": "time_col",
"type": "TIME"
},
{
"mode": "NULLABLE",
"name": "timestamp_col",
"type": "TIMESTAMP"
}
]
36 changes: 30 additions & 6 deletions tests/system/test_arrow.py
Expand Up @@ -14,8 +14,14 @@

"""System tests for Arrow connector."""

from typing import Optional

import pytest

from google.cloud import bigquery
from google.cloud.bigquery import enums


pyarrow = pytest.importorskip(
"pyarrow", minversion="3.0.0"
) # Needs decimal256 for BIGNUMERIC columns.
Expand All @@ -31,17 +37,35 @@
),
)
def test_list_rows_nullable_scalars_dtypes(
bigquery_client,
scalars_table,
scalars_extreme_table,
max_results,
scalars_table_name,
bigquery_client: bigquery.Client,
scalars_table: str,
scalars_extreme_table: str,
max_results: Optional[int],
scalars_table_name: str,
):
table_id = scalars_table
if scalars_table_name == "scalars_extreme_table":
table_id = scalars_extreme_table

# TODO(GH#836): Avoid INTERVAL columns until they are supported by the
# BigQuery Storage API and pyarrow.
schema = [
bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN),
bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC),
bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES),
bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE),
bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME),
bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64),
bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY),
bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64),
bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC),
bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING),
bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME),
bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP),
]

arrow_table = bigquery_client.list_rows(
table_id, max_results=max_results,
table_id, max_results=max_results, selected_fields=schema,
).to_arrow()

schema = arrow_table.schema
Expand Down
48 changes: 0 additions & 48 deletions tests/system/test_client.py
Expand Up @@ -2428,54 +2428,6 @@ def test_nested_table_to_arrow(self):
self.assertTrue(pyarrow.types.is_list(record_col[1].type))
self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type))

def test_list_rows_empty_table(self):
from google.cloud.bigquery.table import RowIterator

dataset_id = _make_dataset_id("empty_table")
dataset = self.temp_dataset(dataset_id)
table_ref = dataset.table("empty_table")
table = Config.CLIENT.create_table(bigquery.Table(table_ref))

# It's a bit silly to list rows for an empty table, but this does
# happen as the result of a DDL query from an IPython magic command.
rows = Config.CLIENT.list_rows(table)
self.assertIsInstance(rows, RowIterator)
self.assertEqual(tuple(rows), ())

def test_list_rows_page_size(self):
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import WriteDisposition

num_items = 7
page_size = 3
num_pages, num_last_page = divmod(num_items, page_size)

SF = bigquery.SchemaField
schema = [SF("string_col", "STRING", mode="NULLABLE")]
to_insert = [{"string_col": "item%d" % i} for i in range(num_items)]
rows = [json.dumps(row) for row in to_insert]
body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))

table_id = "test_table"
dataset = self.temp_dataset(_make_dataset_id("nested_df"))
table = dataset.table(table_id)
self.to_delete.insert(0, table)
job_config = bigquery.LoadJobConfig()
job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
job_config.schema = schema
# Load a table using a local JSON file from memory.
Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result()

df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size)
pages = df.pages

for i in range(num_pages):
page = next(pages)
self.assertEqual(page.num_items, page_size)
page = next(pages)
self.assertEqual(page.num_items, num_last_page)

def temp_dataset(self, dataset_id, location=None):
project = Config.CLIENT.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
Expand Down
112 changes: 112 additions & 0 deletions tests/system/test_list_rows.py
@@ -0,0 +1,112 @@
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import decimal

from google.cloud import bigquery
from google.cloud.bigquery import enums


def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str):
from google.cloud.bigquery.table import RowIterator

table = bigquery_client.create_table(table_id)

# It's a bit silly to list rows for an empty table, but this does
# happen as the result of a DDL query from an IPython magic command.
rows = bigquery_client.list_rows(table)
assert isinstance(rows, RowIterator)
assert tuple(rows) == ()


def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str):
num_items = 7
page_size = 3
num_pages, num_last_page = divmod(num_items, page_size)

to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)]
bigquery_client.load_table_from_json(to_insert, table_id).result()

df = bigquery_client.list_rows(
table_id,
selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)],
page_size=page_size,
)
pages = df.pages

for i in range(num_pages):
page = next(pages)
assert page.num_items == page_size
page = next(pages)
assert page.num_items == num_last_page


def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str):
rows = sorted(
bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"]
)
row = rows[0]
assert row["bool_col"] # True
assert row["bytes_col"] == b"Hello, World!"
assert row["date_col"] == datetime.date(2021, 7, 21)
assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
assert row["int64_col"] == 123456789
assert row["numeric_col"] == decimal.Decimal("1.23456789")
assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
assert row["float64_col"] == 1.25
assert row["string_col"] == "Hello, World!"
assert row["time_col"] == datetime.time(11, 41, 43, 76160)
assert row["timestamp_col"] == datetime.datetime(
2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc
)

nullrow = rows[1]
for column, value in nullrow.items():
if column == "rowindex":
assert value == 1
else:
assert value is None


def test_list_rows_scalars_extreme(
bigquery_client: bigquery.Client, scalars_extreme_table: str
):
rows = sorted(
bigquery_client.list_rows(scalars_extreme_table),
key=lambda row: row["rowindex"],
)
row = rows[0]
assert row["bool_col"] # True
assert row["bytes_col"] == b"\r\n"
assert row["date_col"] == datetime.date(9999, 12, 31)
assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
assert row["geography_col"] == "POINT(-135 90)"
assert row["int64_col"] == 9223372036854775807
assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28")
assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37")
assert row["float64_col"] == float("Inf")
assert row["string_col"] == "Hello, World"
assert row["time_col"] == datetime.time(23, 59, 59, 999999)
assert row["timestamp_col"] == datetime.datetime(
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
)

nullrow = rows[4]
for column, value in nullrow.items():
if column == "rowindex":
assert value == 4
else:
assert value is None

0 comments on commit e3704c3

Please sign in to comment.