From 96e25a65a4e33c3d822600b28f49cbb448d90277 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Wed, 1 Oct 2025 17:21:22 -0400 Subject: [PATCH 1/2] Refactor to DRY up HTTP handling --- .../commons/importer/FileFetcher.java | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java b/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java index 4cecab8..7c80780 100644 --- a/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java +++ b/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java @@ -43,7 +43,6 @@ public FileFetcher(String apiUrl, String categoryName, boolean subcategories) { */ public void getCallResults(String category) throws IOException { - OkHttpClient client = HttpClient.getClient(); urlBase = HttpUrl.parse(apiUrl).newBuilder() .addQueryParameter("action", "query") .addQueryParameter("list", "categorymembers") @@ -52,9 +51,7 @@ public void getCallResults(String category) throws IOException { .addQueryParameter("cmprop", "title|type|ids") .addQueryParameter("cmlimit", "500") .addQueryParameter("format", "json").build(); - Request request = new Request.Builder().url(urlBase).build(); - Response response = client.newCall(request).execute(); - JsonNode jsonNode = new ObjectMapper().readTree(response.body().string()); + JsonNode jsonNode = getJson(urlBase); callResults = jsonNode.path("query").path("categorymembers"); cmcontinue = jsonNode.path("continue").path("cmcontinue").asText(); @@ -64,17 +61,31 @@ public void getCallResults(String category) throws IOException { * API call when a cmcontinue token is part of the response * @param urlContinue: URL containing the cmcontinue token */ - private void getCallResults(HttpUrl urlContinue) throws IOException { + private void getContinuationResults(HttpUrl urlContinue) throws IOException { - OkHttpClient client = new OkHttpClient.Builder().build(); - Request request = new Request.Builder().url(urlContinue).build(); - Response response = client.newCall(request).execute(); - JsonNode jsonNode = new ObjectMapper().readTree(response.body().string()); + JsonNode jsonNode = getJson(urlContinue); callResults = jsonNode.path("query").path("categorymembers"); cmcontinue = jsonNode.path("continue").path("cmcontinue").asText(); } + private JsonNode getJson(HttpUrl url) throws IOException { + + Request request = new Request.Builder().url(url).build(); + try (Response response = HttpClient.getClient().newCall(request).execute()) { + if (response.isSuccessful()) { + if (response.body() != null) { + return new ObjectMapper().readTree(response.body().string()); + } else { + return new ObjectMapper().readTree("[]"); + } + } else { + throw new IOException("API request failed with status code: " + response.code() + ", body: " + response.message()); + } + } + + } + /** * Internal function used to iterate over the paginated results of the MediaWiki API * when fetching files or categories. @@ -164,7 +175,7 @@ public JsonNode next() { urlContinue = HttpUrl.parse(urlBase.toString()).newBuilder() .addQueryParameter("cmcontinue", cmcontinue).build(); try { - getCallResults(urlContinue); + getContinuationResults(urlContinue); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); From e9b33c824daa7d1ef02de3ff3b8df8d79d7aab28 Mon Sep 17 00:00:00 2001 From: Tom Morris Date: Wed, 1 Oct 2025 17:50:04 -0400 Subject: [PATCH 2/2] Refactor URL building --- .../commons/importer/FileFetcher.java | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java b/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java index 7c80780..5a378a0 100644 --- a/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java +++ b/src/main/java/org/openrefine/extensions/commons/importer/FileFetcher.java @@ -9,7 +9,6 @@ import com.google.common.collect.Iterators; import okhttp3.HttpUrl; -import okhttp3.OkHttpClient; import okhttp3.Request; import okhttp3.Response; @@ -19,9 +18,7 @@ public class FileFetcher implements Iterator{ String apiUrl; String categoryName; - boolean subcategories; - HttpUrl urlBase; - HttpUrl urlContinue; + String cmType; JsonNode callResults; private int indexRow = 0; String cmcontinue; @@ -29,8 +26,9 @@ public class FileFetcher implements Iterator{ public FileFetcher(String apiUrl, String categoryName, boolean subcategories) { this.apiUrl = apiUrl; this.categoryName = categoryName; - this.subcategories = subcategories; + this.cmType = subcategories ? "subcat" : "file"; try { + // TODO: it's weird that this uses categoryName as a parameter, but subcategories implicitly from the field getCallResults(categoryName); } catch (IOException e) { throw new UncheckedIOException(e); @@ -43,18 +41,22 @@ public FileFetcher(String apiUrl, String categoryName, boolean subcategories) { */ public void getCallResults(String category) throws IOException { - urlBase = HttpUrl.parse(apiUrl).newBuilder() + HttpUrl urlBase = buildBaseUrl(category, cmType); + JsonNode jsonNode = getJson(urlBase); + callResults = jsonNode.path("query").path("categorymembers"); + cmcontinue = jsonNode.path("continue").path("cmcontinue").asText(); + + } + + private HttpUrl buildBaseUrl(String category, String cmtype) { + return HttpUrl.parse(apiUrl).newBuilder() .addQueryParameter("action", "query") .addQueryParameter("list", "categorymembers") .addQueryParameter("cmtitle", category) - .addQueryParameter("cmtype", subcategories ? "subcat":"file") + .addQueryParameter("cmtype", cmtype) .addQueryParameter("cmprop", "title|type|ids") .addQueryParameter("cmlimit", "500") .addQueryParameter("format", "json").build(); - JsonNode jsonNode = getJson(urlBase); - callResults = jsonNode.path("query").path("categorymembers"); - cmcontinue = jsonNode.path("continue").path("cmcontinue").asText(); - } /** @@ -172,7 +174,7 @@ public JsonNode next() { indexRow++; if ((indexRow == callResults.size()) && !cmcontinue.isBlank()) { - urlContinue = HttpUrl.parse(urlBase.toString()).newBuilder() + HttpUrl urlContinue = buildBaseUrl(categoryName, cmType).newBuilder() .addQueryParameter("cmcontinue", cmcontinue).build(); try { getContinuationResults(urlContinue);