From fd0753338e15965347683345b0e51838baf5d9f6 Mon Sep 17 00:00:00 2001 From: Franklin Whaite <70151215+franklinWhaite@users.noreply.github.com> Date: Thu, 3 Mar 2022 12:16:27 -0500 Subject: [PATCH] Fix: adjusting retry logic to avoid retrying successful job creation (#1879) https://github.com/googleapis/java-bigquery/pull/1744 introduced retrying 200 responses, but the regex that we are using is too broad: [".*exceed.*rate.limit."] and in some cases it may catch a valid response. The issue is that the current logic is scanning the whole response. In this PR the retry logic is modified to specifically check the error message from the response. For this purpose parsing logic was developed to extract error messages from responses. This logic is specifically designed for the [jobs.insert method response](https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert#response-body) (only case observed so far where a response with status code 200 might also return an error message). --- .../bigquery/BigQueryRetryAlgorithm.java | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQueryRetryAlgorithm.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQueryRetryAlgorithm.java index af472430f..0429b7f00 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQueryRetryAlgorithm.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/BigQueryRetryAlgorithm.java @@ -25,6 +25,8 @@ import com.google.api.gax.retrying.TimedAttemptSettings; import com.google.api.gax.retrying.TimedRetryAlgorithm; import com.google.api.gax.retrying.TimedRetryAlgorithmWithContext; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; import java.util.Iterator; import java.util.UUID; import java.util.concurrent.CancellationException; @@ -107,9 +109,10 @@ private boolean shouldRetryBasedOnBigQueryRetryConfig( /* In some cases error messages may come without an exception e.g. status code 200 with a rate limit exceeded for job create - in these cases there is now previousThrowable so we need to check previousResponse + in these cases there is no previousThrowable so we need + to check for error messages in previousResponse */ - errorDesc = previousResponse.toString(); + errorDesc = getErrorDescFromResponse(previousResponse); } if (errorDesc != null) { @@ -212,4 +215,28 @@ private TimedAttemptSettings createNextAttemptBasedOnTiming( } return getTimedAlgorithm().createNextAttempt(previousSettings); } + + private String getErrorDescFromResponse(ResponseT previousResponse) { + /* + error messages may come without an exception and must be extracted from response + following logic based on response body of jobs.insert method, so far the only + known case where a response with status code 200 may contain an error message + */ + try { + JsonObject responseJson = + JsonParser.parseString(previousResponse.toString()).getAsJsonObject(); + if (responseJson.has("status") && responseJson.getAsJsonObject("status").has("errorResult")) { + return responseJson + .getAsJsonObject("status") + .getAsJsonObject("errorResult") + .get("message") + .toString(); + } else { + return null; + } + } catch (Exception e) { + // exceptions here implies no error message present in response, returning null + return null; + } + } }