Skip to content

Commit

Permalink
feat: we can pass extra json.dumps args when using load_table_from_json
Browse files Browse the repository at this point in the history
  • Loading branch information
HCA97 committed Aug 7, 2023
1 parent 0686848 commit da90fcb
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 2 deletions.
12 changes: 10 additions & 2 deletions google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2759,6 +2759,7 @@ def load_table_from_json(
project: Optional[str] = None,
job_config: Optional[LoadJobConfig] = None,
timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
json_dumps_kwargs: Optional[Dict[str, Any]] = None,
) -> job.LoadJob:
"""Upload the contents of a table from a JSON string or dict.
Expand Down Expand Up @@ -2811,7 +2812,8 @@ def load_table_from_json(
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
json_dumps_kwargs:
Extra keyword arguments for ``json.dumps``. Usefull when you want to parse datetime objects.
Returns:
google.cloud.bigquery.job.LoadJob: A new load job.
Expand Down Expand Up @@ -2842,7 +2844,13 @@ def load_table_from_json(

destination = _table_arg_to_table_ref(destination, default_project=self.project)

data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
if json_dumps_kwargs is None:
json_dumps_kwargs = {}

data_str = "\n".join(
json.dumps(item, ensure_ascii=False, **json_dumps_kwargs)
for item in json_rows
)
encoded_str = data_str.encode()
data_file = io.BytesIO(encoded_str)
return self.load_table_from_file(
Expand Down
59 changes: 59 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8665,6 +8665,65 @@ def test_load_table_from_dataframe_with_csv_source_format(self):
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.CSV

def test_load_table_from_json_basic_use_with_json_dumps_kwargs(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job

def json_serial(obj):
"""JSON serializer for objects not serializable by default json code
Ref: https://stackoverflow.com/a/22238613
"""

if isinstance(obj, (datetime.datetime, datetime.date)):
return obj.isoformat()
raise TypeError("Type %s not serializable" % type(obj))

client = self._make_client()

json_rows = [
{
"name": "One",
"age": 11,
"birthday": datetime.date(2008, 9, 10),
"adult": False,
},
{
"name": "Two",
"age": 22,
"birthday": datetime.date(1997, 8, 9),
"adult": True,
},
]

load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)

with load_patch as load_table_from_file:
client.load_table_from_json(
json_rows, self.TABLE_REF, json_dumps_kwargs={"default": json_serial}
)

load_table_from_file.assert_called_once_with(
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
location=client.location,
project=client.project,
job_config=mock.ANY,
timeout=DEFAULT_TIMEOUT,
)

sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
assert sent_config.schema is None
assert sent_config.autodetect

def test_load_table_from_json_basic_use(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
Expand Down

0 comments on commit da90fcb

Please sign in to comment.