From 646f440ce1b970cd67f83addc69988ef74bce78d Mon Sep 17 00:00:00 2001 From: Thierry Rondet Date: Sat, 26 Sep 2020 04:06:52 +0200 Subject: [PATCH 1/6] add support for new json object list output format --- README.md | 10 +- .../elasticsearch/df/content/ContentType.java | 22 +++ .../df/content/json/JsonListContent.java | 134 ++++++++++++++++++ .../elasticsearch/df/rest/RestDataAction.java | 3 + .../df/DataFormatPluginTest.java | 105 ++++++++++++++ 5 files changed, 273 insertions(+), 1 deletion(-) create mode 100644 src/main/java/org/codelibs/elasticsearch/df/content/json/JsonListContent.java diff --git a/README.md b/README.md index cc1255d..9d1b93b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Elasticsearch Data Format Plugin ## Overview Elasticsearch Data Format Plugin provides a feature to allow you to download a response of a search result as several formats other than JSON. -The supported formats are CSV, Excel and JSON(Bulk). +The supported formats are CSV, Excel, JSON(Bulk) and JSON(Object List). ## Version @@ -67,3 +67,11 @@ If not, it's as scan query(all data are stored.). | bulk.index | string | Index name in Bulk file | | bulk.type | string | Type name in Bulk file | +### JSON (Object List format) + + $ curl -o /tmp/data.json -XGET "localhost:9200/{index}/{type}/_data?format=jsonlist&source=..." + +| Request Parameter | Type | Description | +| :---------------- | :----: | :----------------------------------------------------------- | +| source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) | + diff --git a/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java b/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java index adfc2f1..8554dfb 100644 --- a/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java +++ b/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java @@ -2,6 +2,7 @@ import org.codelibs.elasticsearch.df.content.csv.CsvContent; import org.codelibs.elasticsearch.df.content.json.JsonContent; +import org.codelibs.elasticsearch.df.content.json.JsonListContent; import org.codelibs.elasticsearch.df.content.xls.XlsContent; import org.elasticsearch.client.Client; import org.elasticsearch.rest.RestRequest; @@ -95,6 +96,27 @@ public String fileName(final RestRequest request) { } return index + ".xlsx"; } + }, + JSONLIST(50) { + @Override + public String contentType() { + return "application/json"; + } + + @Override + public DataContent dataContent(final Client client, + final RestRequest request) { + return new JsonListContent(client, request, this); + } + + @Override + public String fileName(final RestRequest request) { + final String index = request.param("index"); + if (index == null) { + return "_all.json"; + } + return index + ".json"; + } }; private int index; diff --git a/src/main/java/org/codelibs/elasticsearch/df/content/json/JsonListContent.java b/src/main/java/org/codelibs/elasticsearch/df/content/json/JsonListContent.java new file mode 100644 index 0000000..b468fb7 --- /dev/null +++ b/src/main/java/org/codelibs/elasticsearch/df/content/json/JsonListContent.java @@ -0,0 +1,134 @@ +package org.codelibs.elasticsearch.df.content.json; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.codelibs.elasticsearch.df.content.ContentType; +import org.codelibs.elasticsearch.df.content.DataContent; +import org.codelibs.elasticsearch.df.util.RequestUtil; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.rest.RestChannel; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; + +public class JsonListContent extends DataContent { + private static final Logger logger = LogManager.getLogger(JsonListContent.class); + + public JsonListContent(final Client client, final RestRequest request, final ContentType contentType) { + super(client, request, contentType); + } + + @Override + public void write(final File outputFile, final SearchResponse response, final RestChannel channel, + final ActionListener listener) { + try { + final OnLoadListener onLoadListener = new OnLoadListener( + outputFile, listener); + onLoadListener.onResponse(response); + } catch (final Exception e) { + listener.onFailure(new ElasticsearchException("Failed to write data.", + e)); + } + } + + protected class OnLoadListener implements ActionListener { + protected ActionListener listener; + + protected Writer writer; + + protected File outputFile; + + private long currentCount = 0; + + private boolean firstLine = true; + + protected OnLoadListener(final File outputFile, final ActionListener listener) { + this.outputFile = outputFile; + this.listener = listener; + try { + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(outputFile), "UTF-8")); + } catch (final Exception e) { + throw new ElasticsearchException("Could not open " + + outputFile.getAbsolutePath(), e); + } + try { + writer.append('['); + }catch (final Exception e) { + onFailure(e); + } + } + + @Override + public void onResponse(final SearchResponse response) { + final String scrollId = response.getScrollId(); + final SearchHits hits = response.getHits(); + final int size = hits.getHits().length; + currentCount += size; + if (logger.isDebugEnabled()) { + logger.debug("scrollId: {}, totalHits: {}, hits: {}, current: {}", + scrollId, hits.getTotalHits(), size, currentCount); + } + try { + for (final SearchHit hit : hits) { + final String source = XContentHelper.convertToJson( + hit.getSourceRef(), true, false, XContentType.JSON); + if (!firstLine){ + writer.append(','); + }else{ + firstLine = false; + } + writer.append('\n').append(source); + } + + if (size == 0 || scrollId == null) { + // end + writer.append('\n').append(']'); + writer.flush(); + close(); + listener.onResponse(null); + } else { + client.prepareSearchScroll(scrollId) + .setScroll(RequestUtil.getScroll(request)) + .execute(this); + } + } catch (final Exception e) { + onFailure(e); + } + } + + @Override + public void onFailure(final Exception e) { + try { + close(); + } catch (final Exception e1) { + // ignore + } + listener.onFailure(new ElasticsearchException("Failed to write data.", + e)); + } + + private void close() { + if (writer != null) { + try { + writer.close(); + } catch (final IOException e) { + throw new ElasticsearchException("Could not close " + + outputFile.getAbsolutePath(), e); + } + } + } + } +} diff --git a/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java b/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java index 190c407..a19363a 100644 --- a/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java +++ b/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java @@ -129,6 +129,9 @@ private ContentType getContentType(final RestRequest request) { || "application/json".equals(contentType) || "json".equalsIgnoreCase(contentType)) { return ContentType.JSON; + } else if ("application/list+json".equals(contentType) + || "jsonlist".equals(contentType)) { + return ContentType.JSONLIST; } return null; diff --git a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java index 9debb16..c405011 100644 --- a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java +++ b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java @@ -52,11 +52,13 @@ public class DataFormatPluginTest { private static final File csvTempFile; private static final File xlsTempFile; private static final File jsonTempFile; + private static final File jsonListTempFile; private static final String path; private final Map paramsCsv = new HashMap<>(); private final Map paramsXls = new HashMap<>(); private final Map paramsJson = new HashMap<>(); + private final Map paramsJsonList = new HashMap<>(); static { docNumber = 20; @@ -64,6 +66,7 @@ public class DataFormatPluginTest { csvTempFile = createTempFile("csvtest", ".csv"); xlsTempFile = createTempFile("xlstest", ".xls"); jsonTempFile = createTempFile("jsontest", ".json"); + jsonListTempFile = createTempFile("jsonlisttest", ".json"); path = "/dataset0/_data"; } @@ -106,6 +109,7 @@ public void prepareParams() { paramsCsv.put("format", "csv"); paramsXls.put("format", "xls"); paramsJson.put("format", "json"); + paramsJsonList.put("format", "jsonlist"); } @After @@ -113,6 +117,7 @@ public void clearParams() { paramsCsv.clear(); paramsXls.clear(); paramsJson.clear(); + paramsJsonList.clear(); } @Test @@ -421,6 +426,106 @@ public void dumpJsonInFile() throws IOException { } } + @Test + public void dumpJsonList() throws IOException { + + // Download All as JSON + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") + .header("Content-Type", "application/json") + .param("format", "jsonlist").execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals(docNumber + 2, lines.length); + assertTrue(lines[0].equals("[")); + assertTrue(lines[1].startsWith("{" + "\"aaa\":\"test")); + assertTrue(lines[docNumber + 1].equals("]")); + } + + final String query = "{\"query\":{\"bool\":{\"must\":[{\"range\":{\"bbb\":{\"from\":\"1\",\"to\":\"10\"}}}],\"must_not\":[],\"should\":[]}},\"sort\":[\"bbb\"]}"; + + // Download 10 docs as JSON with Query + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") + .header("Content-Type", "application/json") + .param("format", "jsonlist") + .param("search_type", "query_then_fetch").body(query) + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals(10 + 2, lines.length); + assertTrue(lines[0].startsWith("[")); + assertTrue(lines[1].startsWith("{" + "\"aaa\":\"test")); + } + + // Download 10 docs as JSON + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") + .header("Content-Type", "application/json").param("q", "*:*") + .param("format", "jsonlist").param("from", "5").execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals(15 + 2, lines.length); + } + + // Download all the docs from the 5th as JSON + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") + .header("Content-Type", "application/json").param("q", "*:*") + .param("format", "jsonlist").param("from", "5") + .param("size", String.valueOf(docNumber)).execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals((docNumber - 5) + 2, lines.length); + } + + final String queryWithFrom = "{\"query\":{\"match_all\":{}},\"from\":5,\"size\":" + String.valueOf(docNumber) + ",\"sort\":[\"bbb\"]}"; + + // Download All as JSON with Query and from + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") + .header("Content-Type", "application/json") + .param("format", "jsonlist").body(queryWithFrom).execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals((docNumber - 5) + 2, lines.length); + } + + // Download All as JSON with Query and from + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") + .header("Content-Type", "application/json") + .param("format", "jsonlist").param("source", queryWithFrom) + .param("source_content_type", "application/json") + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals((docNumber - 5) + 2, lines.length); + } + + // Download All as JSON with search_type + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") + .header("Content-Type", "application/json") + .param("search_type", "query_then_fetch") + .param("format", "jsonlist").execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals(docNumber + 2, lines.length); + assertTrue(lines[0].equals("[")); + assertTrue(lines[1].startsWith("{" + "\"aaa\":\"test")); + assertTrue(lines[docNumber + 1].equals("]")); + } + } + + @Test + public void dumpJsonListInFile() throws IOException { + paramsJsonList.put("file", jsonListTempFile.getAbsolutePath()); + + try (CurlResponse curlResponse = createRequest(node, path, paramsJsonList).execute()) { + assertAcknowledged(curlResponse, jsonListTempFile); + final List lines = Files.readAllLines(jsonListTempFile.toPath(), Charsets.UTF_8); + assertEquals(docNumber + 2, lines.size()); + assertTrue(lines.get(0).equals("[")); + assertTrue(lines.get(1).startsWith("{" + "\"aaa\":\"test")); + assertTrue(lines.get(docNumber).startsWith("{" + "\"aaa\":\"test")); + assertTrue(lines.get(docNumber + 1).equals("]")); + } + } + @Test public void dumpSizeLimit() throws IOException { From 3dbd3da22217904b1fe43da3f19099ad2b29679c Mon Sep 17 00:00:00 2001 From: Thierry Rondet Date: Fri, 11 Dec 2020 02:41:08 +0100 Subject: [PATCH 2/6] add support for geojson output format --- README.md | 18 +- pom.xml | 5 + .../elasticsearch/df/content/ContentType.java | 22 ++ .../df/content/geojson/GeoJsonContent.java | 240 ++++++++++++++++++ .../elasticsearch/df/rest/RestDataAction.java | 4 + .../elasticsearch/df/util/JsonUtils.java | 96 +++++++ .../df/DataFormatPluginTest.java | 176 ++++++++++++- 7 files changed, 555 insertions(+), 6 deletions(-) create mode 100644 src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java create mode 100644 src/main/java/org/codelibs/elasticsearch/df/util/JsonUtils.java diff --git a/README.md b/README.md index 9d1b93b..d75d8a5 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Elasticsearch Data Format Plugin ## Overview Elasticsearch Data Format Plugin provides a feature to allow you to download a response of a search result as several formats other than JSON. -The supported formats are CSV, Excel, JSON(Bulk) and JSON(Object List). +The supported formats are CSV, Excel, JSON(Bulk), JSON(Object List) and GeoJSON. ## Version @@ -75,3 +75,19 @@ If not, it's as scan query(all data are stored.). | :---------------- | :----: | :----------------------------------------------------------- | | source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) | +### GeoJSON (Open GIS standard) + + $ curl -o /tmp/data.json -XGET "localhost:9200/{index}/{type}/_data?format=geojson&source=..." + +| Request Parameter | Type | Description | +| :----------------------- | :----: | :----------------------------------------------------------- | +| source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) | +| geometry.lon_field | string | Longitude field for coordinates (Support Geometry type "Point") | +| geometry.lat_field | string | Latitude field for coordinates (Support Geometry type "Point") | +| geometry.alt_field | string | Altitude field for coordinates (Support Geometry type "Point") | +| geometry.coord_field | string | Coordinates field. Support all Geometry types (see [GeoJSON Example](https://en.wikipedia.org/wiki/GeoJSON)).
If set, overwrite `geometry.lon_field`, `geometry.lat_field` and `geometry.alt_field` | +| geometry.type_field | string | Geometry type field (see [GeoJSON Example](https://en.wikipedia.org/wiki/GeoJSON))
Only used if `geometry.coord_field` param is set | +| keep_geometry_info | boolean | Keep or not the original geometry fields in final GeoJSON properties (default: false) | +| exclude_fields | string | Exclude fields in final geojson properties (comma separate format) | + +**NB**: Field name can use basic style like `a` or JSONpath style like `a.b.c[2].d` \ No newline at end of file diff --git a/pom.xml b/pom.xml index ccaac5d..c8aef8e 100644 --- a/pom.xml +++ b/pom.xml @@ -121,6 +121,11 @@ poi-ooxml-schemas ${poi.version} + + com.google.code.gson + gson + 2.8.6 + org.codelibs elasticsearch-cluster-runner diff --git a/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java b/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java index 8554dfb..bfb1306 100644 --- a/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java +++ b/src/main/java/org/codelibs/elasticsearch/df/content/ContentType.java @@ -1,6 +1,7 @@ package org.codelibs.elasticsearch.df.content; import org.codelibs.elasticsearch.df.content.csv.CsvContent; +import org.codelibs.elasticsearch.df.content.geojson.GeoJsonContent; import org.codelibs.elasticsearch.df.content.json.JsonContent; import org.codelibs.elasticsearch.df.content.json.JsonListContent; import org.codelibs.elasticsearch.df.content.xls.XlsContent; @@ -117,6 +118,27 @@ public String fileName(final RestRequest request) { } return index + ".json"; } + }, + GEOJSON(60) { + @Override + public String contentType() { + return "application/geo+json"; + } + + @Override + public DataContent dataContent(final Client client, + final RestRequest request) { + return new GeoJsonContent(client, request, this); + } + + @Override + public String fileName(final RestRequest request) { + final String index = request.param("index"); + if (index == null) { + return "_all.geojson"; + } + return index + ".geojson"; + } }; private int index; diff --git a/src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java b/src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java new file mode 100644 index 0000000..a503c6d --- /dev/null +++ b/src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java @@ -0,0 +1,240 @@ +package org.codelibs.elasticsearch.df.content.geojson; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.codelibs.elasticsearch.df.content.ContentType; +import org.codelibs.elasticsearch.df.content.DataContent; +import org.codelibs.elasticsearch.df.util.JsonUtils; +import org.codelibs.elasticsearch.df.util.RequestUtil; +import org.codelibs.elasticsearch.df.util.StringUtils; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.rest.RestChannel; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; + +public class GeoJsonContent extends DataContent { + private static final Logger logger = LogManager.getLogger(GeoJsonContent.class); + + private final String geometryCoordinatesLonField; + private final String geometryCoordinatesLatField; + private final String geometryCoordinatesAltField; + private final String geometryTypeField; + private final String geometryCoordinatesField; + private final boolean geometryKeepGeoInfo; + private final List excludeFields; + + public GeoJsonContent(final Client client, final RestRequest request, final ContentType contentType) { + super(client, request, contentType); + + geometryCoordinatesLonField = request.param("geometry.lon_field",StringUtils.EMPTY_STRING); + geometryCoordinatesLatField = request.param("geometry.lat_field",StringUtils.EMPTY_STRING); + geometryCoordinatesAltField = request.param("geometry.alt_field",StringUtils.EMPTY_STRING); + geometryTypeField = request.param("geometry.type_field",StringUtils.EMPTY_STRING); + geometryCoordinatesField = request.param("geometry.coord_field",StringUtils.EMPTY_STRING); + geometryKeepGeoInfo = request.paramAsBoolean("keep_geometry_info",false); + + final String[] fields = request.paramAsStringArray("exclude_fields", StringUtils.EMPTY_STRINGS); + if (fields.length == 0) { + excludeFields = new ArrayList<>(); + } else { + final List fieldList = new ArrayList<>(); + for (final String field : fields) { + fieldList.add(field.trim()); + } + excludeFields = Collections.unmodifiableList(fieldList); + } + + if (logger.isDebugEnabled()) { + logger.debug("geometryTypeField: {}, geometryCoordinatesField: {}, geometryCoordinatesLonField: {}, " + + "geometryCoordinatesLatField: {}, geometryCoordinatesAltField: {}, geometryKeepGeoInfo: {}, excludeFields: {}", + geometryTypeField, geometryCoordinatesField, geometryCoordinatesLonField, + geometryCoordinatesLatField, geometryCoordinatesAltField, geometryKeepGeoInfo, excludeFields); + } + } + + @Override + public void write(final File outputFile, final SearchResponse response, final RestChannel channel, + final ActionListener listener) { + try { + final OnLoadListener onLoadListener = new OnLoadListener( + outputFile, listener); + onLoadListener.onResponse(response); + } catch (final Exception e) { + listener.onFailure(new ElasticsearchException("Failed to write data.", + e)); + } + } + + protected class OnLoadListener implements ActionListener { + protected ActionListener listener; + + protected Writer writer; + + protected File outputFile; + + private long currentCount = 0; + + private boolean firstLine = true; + + protected OnLoadListener(final File outputFile, final ActionListener listener) { + this.outputFile = outputFile; + this.listener = listener; + try { + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(outputFile), "UTF-8")); + } catch (final Exception e) { + throw new ElasticsearchException("Could not open " + + outputFile.getAbsolutePath(), e); + } + try { + writer.append("{\"type\": \"FeatureCollection\", \"features\": ["); + }catch (final Exception e) { + onFailure(e); + } + } + + @Override + public void onResponse(final SearchResponse response) { + final Gson gsonWriter = new GsonBuilder().create(); + final String scrollId = response.getScrollId(); + final SearchHits hits = response.getHits(); + final int size = hits.getHits().length; + currentCount += size; + if (logger.isDebugEnabled()) { + logger.debug("scrollId: {}, totalHits: {}, hits: {}, current: {}", + scrollId, hits.getTotalHits(), size, currentCount); + } + try { + for (final SearchHit hit : hits) { + final String source = XContentHelper.convertToJson( + hit.getSourceRef(), true, false, XContentType.JSON); + if (!firstLine){ + writer.append(','); + }else{ + firstLine = false; + } + + final JsonElement propertiesJson = JsonParser.parseString(source); + String geometryType = ""; + + JsonArray geometryCoordinates = new JsonArray(); + if (!geometryCoordinatesField.isEmpty()){ + JsonElement jsonEltCoord = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesField); + if (jsonEltCoord !=null && !jsonEltCoord.isJsonNull()){ + geometryCoordinates = jsonEltCoord.getAsJsonArray​(); + if (!geometryKeepGeoInfo){ + JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesField); + } + } + if (!geometryTypeField.isEmpty()){ + JsonElement jsonEltType = JsonUtils.getJsonElement(propertiesJson,geometryTypeField); + if (jsonEltType !=null && !jsonEltType.isJsonNull()){ + geometryType = jsonEltType.getAsString(); + if (!geometryKeepGeoInfo){ + JsonUtils.removeJsonElement(propertiesJson,geometryTypeField); + } + } + } + }else{ + if (!geometryCoordinatesLonField.isEmpty() && !geometryCoordinatesLatField.isEmpty()){ + JsonElement jsonEltLon = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLonField); + JsonElement jsonEltLat = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLatField); + if (jsonEltLon !=null && !jsonEltLon.isJsonNull() && jsonEltLat !=null && !jsonEltLat.isJsonNull()){ + geometryCoordinates.add(jsonEltLon.getAsNumber()); + geometryCoordinates.add(jsonEltLat.getAsNumber()); + if (!geometryKeepGeoInfo) { + JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesLonField); + JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesLatField); + } + } + } + if (!geometryCoordinatesAltField.isEmpty()){ + JsonElement jsonElt = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesAltField); + if (jsonElt !=null && !jsonElt.isJsonNull()){ + geometryCoordinates.add(jsonElt.getAsNumber()); + if (!geometryKeepGeoInfo) { + JsonUtils.removeJsonElement(propertiesJson,geometryCoordinatesAltField); + } + } + } + geometryType = "Point"; + } + + for (String excludeField : excludeFields) { + JsonUtils.removeJsonElement(propertiesJson,excludeField); + } + + JsonObject geometryObject = new JsonObject(); + geometryObject.addProperty("type", geometryType); + geometryObject.add("coordinates", geometryCoordinates); + + JsonObject featureObject = new JsonObject(); + featureObject.addProperty("type", "Feature"); + featureObject.add("geometry", geometryObject); + featureObject.add("properties", propertiesJson.getAsJsonObject()); + + writer.append('\n').append(gsonWriter.toJson(featureObject)); + } + + if (size == 0 || scrollId == null) { + // end + writer.append('\n').append("]}"); + writer.flush(); + close(); + listener.onResponse(null); + } else { + client.prepareSearchScroll(scrollId) + .setScroll(RequestUtil.getScroll(request)) + .execute(this); + } + } catch (final Exception e) { + onFailure(e); + } + } + + @Override + public void onFailure(final Exception e) { + try { + close(); + } catch (final Exception e1) { + // ignore + } + listener.onFailure(new ElasticsearchException("Failed to write data.", + e)); + } + + private void close() { + if (writer != null) { + try { + writer.close(); + } catch (final IOException e) { + throw new ElasticsearchException("Could not close " + + outputFile.getAbsolutePath(), e); + } + } + } + } +} diff --git a/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java b/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java index a19363a..a5fc0fc 100644 --- a/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java +++ b/src/main/java/org/codelibs/elasticsearch/df/rest/RestDataAction.java @@ -132,6 +132,10 @@ private ContentType getContentType(final RestRequest request) { } else if ("application/list+json".equals(contentType) || "jsonlist".equals(contentType)) { return ContentType.JSONLIST; + } else if ("application/geo+json".equals(contentType) + || "application/geojson".equals(contentType) + || "geojson".equals(contentType)) { + return ContentType.GEOJSON; } return null; diff --git a/src/main/java/org/codelibs/elasticsearch/df/util/JsonUtils.java b/src/main/java/org/codelibs/elasticsearch/df/util/JsonUtils.java new file mode 100644 index 0000000..7a1d9a3 --- /dev/null +++ b/src/main/java/org/codelibs/elasticsearch/df/util/JsonUtils.java @@ -0,0 +1,96 @@ +package org.codelibs.elasticsearch.df.util; + +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonNull; + +public class JsonUtils { + private JsonUtils() { + } + + /** + * Returns a JSON sub-element from the given JsonElement and the given path + * + * @param json - a Gson JsonElement + * @param path - a JSON path, e.g. a.b.c[2].d + * @return - a sub-element of json according to the given path + */ + public static JsonElement getJsonElement(JsonElement json, String path){ + + String[] parts = path.split("\\.|\\[|\\]"); + JsonElement result = json; + + for (String key : parts) { + + key = key.trim(); + if (key.isEmpty()) + continue; + + if (result == null){ + result = JsonNull.INSTANCE; + break; + } + + if (result.isJsonObject()){ + result = result.getAsJsonObject().get(key); + } + else if (result.isJsonArray()){ + int ix = Integer.valueOf(key); + result = (ix < result.getAsJsonArray().size())?result.getAsJsonArray().get(ix):null; + } + else{ + break; + } + } + + return result; + } + + /** + * Returns a removed JSON sub-element from the given JsonElement and the given path + * + * @param json - a Gson JsonElement + * @param path - a JSON path, e.g. a.b.c[2].d + * @return - a removed sub-element of json according to the given path + */ + public static JsonElement removeJsonElement(JsonElement json, String path){ + + String[] parts = path.split("\\.|\\[|\\]"); + JsonElement result = json; + + for (int i = 0; i < parts.length; i++) { + + String key = parts[i].trim(); + if (key.isEmpty()) + continue; + + if (result == null){ + result = JsonNull.INSTANCE; + break; + } + + boolean lastPart = (i == parts.length-1); + if (result.isJsonObject()){ + if (lastPart){ + result = result.getAsJsonObject().remove(key); + }else{ + result = result.getAsJsonObject().get(key); + } + } + else if (result.isJsonArray()){ + int ix = Integer.valueOf(key); + if (lastPart){ + result = result.getAsJsonArray().remove(ix); + }else{ + result = result.getAsJsonArray().get(ix); + } + } + else{ + break; + } + } + + return result; + } +} diff --git a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java index c405011..fdca579 100644 --- a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java +++ b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java @@ -13,6 +13,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import org.apache.commons.codec.Charsets; import org.apache.poi.hssf.usermodel.HSSFRow; @@ -53,12 +54,15 @@ public class DataFormatPluginTest { private static final File xlsTempFile; private static final File jsonTempFile; private static final File jsonListTempFile; + private static final File geojsonTempFile; private static final String path; + private static final String pathgeo; private final Map paramsCsv = new HashMap<>(); private final Map paramsXls = new HashMap<>(); private final Map paramsJson = new HashMap<>(); private final Map paramsJsonList = new HashMap<>(); + private final Map paramsGeoJson = new HashMap<>(); static { docNumber = 20; @@ -67,7 +71,9 @@ public class DataFormatPluginTest { xlsTempFile = createTempFile("xlstest", ".xls"); jsonTempFile = createTempFile("jsontest", ".json"); jsonListTempFile = createTempFile("jsonlisttest", ".json"); + geojsonTempFile = createTempFile("geojsontest", ".geojson"); path = "/dataset0/_data"; + pathgeo = "/dataset1/_data"; } @BeforeClass @@ -110,6 +116,7 @@ public void prepareParams() { paramsXls.put("format", "xls"); paramsJson.put("format", "json"); paramsJsonList.put("format", "jsonlist"); + paramsGeoJson.put("format", "geojson"); } @After @@ -118,6 +125,7 @@ public void clearParams() { paramsXls.clear(); paramsJson.clear(); paramsJsonList.clear(); + paramsGeoJson.clear(); } @Test @@ -507,7 +515,7 @@ public void dumpJsonList() throws IOException { assertEquals(docNumber + 2, lines.length); assertTrue(lines[0].equals("[")); assertTrue(lines[1].startsWith("{" + "\"aaa\":\"test")); - assertTrue(lines[docNumber + 1].equals("]")); + assertTrue(lines[docNumber + 1].equals("]")); } } @@ -526,6 +534,125 @@ public void dumpJsonListInFile() throws IOException { } } + @Test + public void dumpGeoJson() throws IOException { + + // default call + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") + .header("Content-Type", "application/json") + .param("format", "geojson").execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertEquals(docNumber + 2, lines.length); + assertTrue(lines[0].equals("{\"type\": \"FeatureCollection\", \"features\": [")); + assertTrue(lines[docNumber + 1].equals("]}")); + } + + // normal call with lon_field" and "lat_field" + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") + .header("Content-Type", "application/json") + .param("format", "geojson") + .param("geometry.lon_field", "x_lon") + .param("geometry.lat_field", "x_lat") + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertTrue(!lines[1].contains("\"x_lon\":")); + assertTrue(!lines[1].contains("\"x_lat\":")); + assertTrue(lines[1].matches("(.+)\"geometry\":\\{\"type\":\"Point\",\"coordinates\":\\[[0-9\\.\\-]+,[0-9\\.\\-]+\\](.+)")); + } + + // normal call with lon_field", "lat_field" and "x_alt" but without field cleaning + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") + .header("Content-Type", "application/json") + .param("format", "geojson") + .param("geometry.lon_field", "x_lon") + .param("geometry.lat_field", "x_lat") + .param("geometry.alt_field", "x_alt") + .param("keep_geometry_info", "true") + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertTrue(lines[1].contains("\"x_lon\":")); + assertTrue(lines[1].contains("\"x_lat\":")); + assertTrue(lines[1].contains("\"x_alt\":")); + assertTrue(lines[1].matches("(.+)\"geometry\":\\{\"type\":\"Point\",\"coordinates\":\\[[0-9\\.\\-]+,[0-9\\.\\-]+,[0-9\\.\\-]+\\](.+)")); + } + + // Look for "geometry.alt_field" value in the iii sub-object and exclude unnecessary fields from final properties + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") + .header("Content-Type", "application/json") + .param("format", "geojson") + .param("geometry.lon_field", "x_lon") + .param("geometry.lat_field", "x_lat") + .param("geometry.alt_field", "iii.x_altSub") + .param("exclude_fields", "iii,x_alt,x_coord,x_type,x_typeArray") + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + System.out.println(content); + assertTrue(!lines[1].contains("\"x_lon\":")); + assertTrue(!lines[1].contains("\"x_lat\":")); + assertTrue(!lines[1].contains("\"iii\":{\"jjj\":\"static test\"}")); + assertTrue(!lines[1].contains("\"x_alt\":")); + assertTrue(lines[1].matches("(.+)\"geometry\":\\{\"type\":\"Point\",\"coordinates\":\\[[0-9\\.\\-]+,[0-9\\.\\-]+,[0-9\\.\\-]+\\](.+)")); + } + + // normal call with "type_field" and "coord_field" + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") + .header("Content-Type", "application/json") + .param("format", "geojson") + .param("geometry.type_field", "x_type") + .param("geometry.coord_field", "x_coord") + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertTrue(!lines[1].contains("\"x_coord\":[")); + assertTrue(!lines[1].contains("\"x_type\":")); + assertTrue(lines[1].matches("(.+)\"coordinates\":\\[[0-9,\\.\\-\\[\\]]+\\](.+)")); + } + + // Bad "geometry.coord_field" value + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") + .header("Content-Type", "application/json") + .param("format", "geojson") + .param("geometry.type_field", "x_type") + .param("geometry.coord_field", "x_coords") + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertTrue(lines[1].contains("\"coordinates\":[]")); + assertTrue(lines[1].contains("\"x_coord\":[")); + } + + // Look for "geometry.type_field" value in array at index 1 + try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") + .header("Content-Type", "application/json") + .param("format", "geojson") + .param("geometry.type_field", "x_typeArray[1]") + .param("geometry.coord_field", "x_coord") + .execute()) { + final String content = curlResponse.getContentAsString(); + final String[] lines = content.split("\n"); + assertTrue(lines[1].contains("\"x_typeArray\":[\"badtype\",\"badtype\"]")); + } + } + + @Test + public void dumpGeoJsonInFile() throws IOException { + paramsGeoJson.put("file", geojsonTempFile.getAbsolutePath()); + + try (CurlResponse curlResponse = createRequest(node, pathgeo, paramsGeoJson).execute()) { + assertAcknowledged(curlResponse, geojsonTempFile); + final List lines = Files.readAllLines(geojsonTempFile.toPath(), Charsets.UTF_8); + assertEquals(docNumber + 2, lines.size()); + assertTrue(lines.get(0).equals("{\"type\": \"FeatureCollection\", \"features\": [")); + assertTrue(lines.get(1).startsWith("{\"type\":\"Feature\",\"geometry\":{\"type\":\"")); + assertTrue(lines.get(docNumber).startsWith("{\"type\":\"Feature\",\"geometry\":{\"type\":\"")); + assertTrue(lines.get(docNumber + 1).equals("]}")); + } + } + @Test public void dumpSizeLimit() throws IOException { @@ -558,15 +685,18 @@ public void dumpSizeLimit() throws IOException { private static void indexing() { final String index0 = "dataset0"; final String type0 = "_doc"; + final String index1 = "dataset1"; + final String type1 = "_doc"; // create an index runner.createIndex(index0, (Settings) null); + runner.createIndex(index1, (Settings) null); - if (!runner.indexExists(index0)) { + if (!runner.indexExists(index0) || !runner.indexExists(index1)) { Assert.fail(); } - // create documents + // create documents for index0 for (int i = 1; i <= docNumber; i++) { final IndexResponse indexResponse0 = runner.insert(index0, type0, String.valueOf(i), "{" + @@ -577,11 +707,47 @@ private static void indexing() { "}"); assertEquals(DocWriteResponse.Result.CREATED, indexResponse0.getResult()); } + // create documents for index1 + final String[] geotypeList = { "Point", "LineString", "Polygon" }; + for (int i = 1; i <= docNumber; i++) { + String geotype = geotypeList[new Random().nextInt(geotypeList.length)]; + String geocoord = ""; + switch (geotype) { + case "Point": + geocoord= "[102.0, 0.5]"; + break; + case "LineString": + geocoord= "[[102.0, 0.0], [103.0, 1.0], [104.0, 0.0], [105.0, 1.0]]"; + break; + case "Polygon": + geocoord= "[[[100.0, 0.0], [101.0, 0.0], [101.0, 1.0],[100.0, 1.0], [100.0, 0.0]]]"; + break; + } + + final IndexResponse indexResponse1 = runner.insert(index1, type1, String.valueOf(i), + "{" + + "\"aaa\":\"test " + i + "\"," + + "\"bbb\":" + i + "," + + "\"ccc\":\"2012-01-01:00:00.000Z\"," + + "\"eee\":{\"fff\":\"TEST " + i + "\", \"ggg\":" + i + ", \"hhh\":\"2013-01-01:00:00.000Z\"}," + + "\"x_type\":\"" + geotype + "\"," + + "\"x_typeArray\": [\"badtype\",\"" + geotype + "\",\"badtype\"]," + + "\"x_coord\": " + geocoord + "," + + "\"x_lon\": 1" + i + ".0," + + "\"x_lat\": " + (i/2) + ".0," + + "\"x_alt\": " + (i/2) + ".0," + + "\"iii\":{\"x_altSub\": "+ (i*3) + "}" + + "}"); + assertEquals(DocWriteResponse.Result.CREATED, indexResponse1.getResult()); + } + // refresh elastic cluster runner.refresh(); // search documents to verify - SearchResponse searchResponse = runner.search(index0, type0, null, null, 0, 10); - assertEquals(docNumber, searchResponse.getHits().getTotalHits().value); + SearchResponse searchResponse0 = runner.search(index0, type0, null, null, 0, 10); + SearchResponse searchResponse1 = runner.search(index1, type1, null, null, 0, 10); + assertEquals(docNumber, searchResponse0.getHits().getTotalHits().value); + assertEquals(docNumber, searchResponse1.getHits().getTotalHits().value); } private static File createTempFile(String prefix, String suffix) { From 975bbf35cb5cb933eb60be8508ba69efdd2b7d6e Mon Sep 17 00:00:00 2001 From: Thierry Rondet Date: Fri, 11 Dec 2020 02:41:48 +0100 Subject: [PATCH 3/6] update some missing parameters in README --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d75d8a5..c33f00b 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ If not, it's as scan query(all data are stored.). | Request Parameter | Type | Description | |:------------------|:-------:|:------------| | append.header | boolean | Append column headers if true | -| fields_name | string | choose the fields to dump | +| fields_name | string | choose the fields to dump (comma separate format) | | source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) | | csv.separator | string | Separate character in CSV | | csv.quote | string | Quote character in CSV| @@ -46,7 +46,7 @@ If not, it's as scan query(all data are stored.). | Request Parameter | Type | Description | |:------------------|:-------:|:------------| | append.header | boolean | Append column headers if true | -| fields_name | string | choose the fields to dump | +| fields_name | string | choose the fields to dump (comma separate format) | | source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) | ### Excel 2007 @@ -55,6 +55,8 @@ If not, it's as scan query(all data are stored.). | Request Parameter | Type | Description | |:------------------|:-------:|:------------| +| append.header | boolean | Append column headers if true | +| fields_name | string | choose the fields to dump (comma separate format) | | source | string | [Query DSL](http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl.html) | ### JSON (Elasticsearch Bulk format) From 9497f1a63a7b223342b7d7b32346e4c77583cd6e Mon Sep 17 00:00:00 2001 From: Thierry Rondet Date: Fri, 11 Dec 2020 04:12:06 +0100 Subject: [PATCH 4/6] remove dirty debug message --- .../java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java index fdca579..3b244d4 100644 --- a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java +++ b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java @@ -590,7 +590,6 @@ public void dumpGeoJson() throws IOException { .execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); - System.out.println(content); assertTrue(!lines[1].contains("\"x_lon\":")); assertTrue(!lines[1].contains("\"x_lat\":")); assertTrue(!lines[1].contains("\"iii\":{\"jjj\":\"static test\"}")); From 087bebb61de93afe160c49c4ea032c66e29ce446 Mon Sep 17 00:00:00 2001 From: Thierry Rondet Date: Fri, 11 Dec 2020 16:26:19 +0100 Subject: [PATCH 5/6] fix bug with plugin security policy --- .../df/content/geojson/GeoJsonContent.java | 22 ++++++++++++------- .../plugin-metadata/plugin-security.policy | 1 + 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java b/src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java index a503c6d..7b66dce 100644 --- a/src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java +++ b/src/main/java/org/codelibs/elasticsearch/df/content/geojson/GeoJsonContent.java @@ -6,6 +6,8 @@ import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; +import java.security.AccessController; +import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -118,7 +120,9 @@ protected OnLoadListener(final File outputFile, final ActionListener liste @Override public void onResponse(final SearchResponse response) { - final Gson gsonWriter = new GsonBuilder().create(); + final Gson gsonWriter = AccessController.doPrivileged((PrivilegedAction) () -> { + return new GsonBuilder().create(); + }); final String scrollId = response.getScrollId(); final SearchHits hits = response.getHits(); final int size = hits.getHits().length; @@ -137,12 +141,14 @@ public void onResponse(final SearchResponse response) { firstLine = false; } - final JsonElement propertiesJson = JsonParser.parseString(source); + final JsonElement propertiesJson = AccessController.doPrivileged((PrivilegedAction) () -> { + return JsonParser.parseString(source); + }); String geometryType = ""; JsonArray geometryCoordinates = new JsonArray(); if (!geometryCoordinatesField.isEmpty()){ - JsonElement jsonEltCoord = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesField); + final JsonElement jsonEltCoord = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesField); if (jsonEltCoord !=null && !jsonEltCoord.isJsonNull()){ geometryCoordinates = jsonEltCoord.getAsJsonArray​(); if (!geometryKeepGeoInfo){ @@ -150,7 +156,7 @@ public void onResponse(final SearchResponse response) { } } if (!geometryTypeField.isEmpty()){ - JsonElement jsonEltType = JsonUtils.getJsonElement(propertiesJson,geometryTypeField); + final JsonElement jsonEltType = JsonUtils.getJsonElement(propertiesJson,geometryTypeField); if (jsonEltType !=null && !jsonEltType.isJsonNull()){ geometryType = jsonEltType.getAsString(); if (!geometryKeepGeoInfo){ @@ -160,8 +166,8 @@ public void onResponse(final SearchResponse response) { } }else{ if (!geometryCoordinatesLonField.isEmpty() && !geometryCoordinatesLatField.isEmpty()){ - JsonElement jsonEltLon = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLonField); - JsonElement jsonEltLat = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLatField); + final JsonElement jsonEltLon = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLonField); + final JsonElement jsonEltLat = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesLatField); if (jsonEltLon !=null && !jsonEltLon.isJsonNull() && jsonEltLat !=null && !jsonEltLat.isJsonNull()){ geometryCoordinates.add(jsonEltLon.getAsNumber()); geometryCoordinates.add(jsonEltLat.getAsNumber()); @@ -172,7 +178,7 @@ public void onResponse(final SearchResponse response) { } } if (!geometryCoordinatesAltField.isEmpty()){ - JsonElement jsonElt = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesAltField); + final JsonElement jsonElt = JsonUtils.getJsonElement(propertiesJson,geometryCoordinatesAltField); if (jsonElt !=null && !jsonElt.isJsonNull()){ geometryCoordinates.add(jsonElt.getAsNumber()); if (!geometryKeepGeoInfo) { @@ -186,7 +192,7 @@ public void onResponse(final SearchResponse response) { for (String excludeField : excludeFields) { JsonUtils.removeJsonElement(propertiesJson,excludeField); } - + JsonObject geometryObject = new JsonObject(); geometryObject.addProperty("type", geometryType); geometryObject.add("coordinates", geometryCoordinates); diff --git a/src/main/plugin-metadata/plugin-security.policy b/src/main/plugin-metadata/plugin-security.policy index 615a15b..d9ed2fe 100644 --- a/src/main/plugin-metadata/plugin-security.policy +++ b/src/main/plugin-metadata/plugin-security.policy @@ -1,3 +1,4 @@ grant { permission java.lang.RuntimePermission "getClassLoader"; + permission java.lang.RuntimePermission "accessDeclaredMembers"; }; From 62100f1df607cdade1df3fd04f9daafe61cf1f47 Mon Sep 17 00:00:00 2001 From: Thierry Rondet Date: Fri, 11 Dec 2020 17:21:28 +0100 Subject: [PATCH 6/6] update tests to correctly support variable docNumber --- .../df/DataFormatPluginTest.java | 74 ++++++++++++------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java index 3b244d4..1a995c5 100644 --- a/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java +++ b/src/test/java/org/codelibs/elasticsearch/df/DataFormatPluginTest.java @@ -65,6 +65,8 @@ public class DataFormatPluginTest { private final Map paramsGeoJson = new HashMap<>(); static { + // Doc number used for test + // -> max 9990 because elastic query size limited below 10000 docNumber = 20; csvTempFile = createTempFile("csvtest", ".csv"); @@ -168,14 +170,15 @@ public void dumpCsvWithQuery() throws IOException { assertLineContains(lines[0], "\"aaa\"", "\"bbb\"", "\"ccc\"", "\"eee.fff\"", "\"eee.ggg\""); assertLineContains(lines[1], "\"1\""); } - + // Download 10 docs as CSV clearParams(); prepareParams(); paramsCsv.put("q", "*:*"); paramsCsv.put("from", "5"); - try (CurlResponse response = createRequest(node, path, paramsCsv).execute()) { - assertEquals(16, response.getContentAsString().split("\n").length); + try (CurlResponse response = createRequest(node, path, paramsCsv) + .param("size", Integer.toString(docNumber)).execute()) { + assertEquals(docNumber - 4, response.getContentAsString().split("\n").length); } // Download all the docs from the 5th as CSV @@ -193,7 +196,9 @@ public void dumpCsvWithQuery() throws IOException { // Download All as CSV with Query and from try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "csv").body(queryWithFrom).execute()) { + .param("format", "csv") + .param("size", Integer.toString(docNumber)) + .body(queryWithFrom).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber - 10 + 1, lines.length); @@ -204,6 +209,7 @@ public void dumpCsvWithQuery() throws IOException { .header("Content-Type", "application/json") .param("format", "csv").param("source", queryWithFrom) .param("source_content_type", "application/json") + .param("size", String.valueOf(docNumber)) .execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); @@ -214,6 +220,7 @@ public void dumpCsvWithQuery() throws IOException { try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") .param("search_type", "query_then_fetch").param("format", "csv") + .param("size", String.valueOf(docNumber)) .execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); @@ -299,7 +306,9 @@ public void dumpExcel() throws IOException { // Download All as Excel with search_type try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("search_type", "query_then_fetch").param("format", "xls") + .param("search_type", "query_then_fetch") + .param("format", "xls") + .param("size", Integer.toString(docNumber)) .execute()) { try (InputStream is = curlResponse.getContentAsStream()) { final POIFSFileSystem fs = new POIFSFileSystem(is); @@ -331,7 +340,8 @@ public void dumpJson() throws IOException { // Download All as JSON try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "json").execute()) { + .param("format", "json") + .param("size", Integer.toString(docNumber)).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber * 2, lines.length); @@ -343,6 +353,7 @@ public void dumpJson() throws IOException { try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") .param("format", "json").param("bulk.index", "dataset02") + .param("size", Integer.toString(docNumber)) .execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); @@ -366,15 +377,6 @@ public void dumpJson() throws IOException { assertTrue(lines[1].startsWith("{\"aaa\":\"test 1\",")); } - // Download 10 docs as JSON - try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") - .header("Content-Type", "application/json").param("q", "*:*") - .param("format", "json").param("from", "5").execute()) { - final String content = curlResponse.getContentAsString(); - final String[] lines = content.split("\n"); - assertEquals(30, lines.length); - } - // Download all the docs from the 5th as JSON try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json").param("q", "*:*") @@ -390,7 +392,9 @@ public void dumpJson() throws IOException { // Download All as JSON with Query and from try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "json").body(queryWithFrom).execute()) { + .param("format", "json") + .param("size", String.valueOf(docNumber)) + .body(queryWithFrom).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals((docNumber - 5) * 2, lines.length); @@ -401,6 +405,7 @@ public void dumpJson() throws IOException { .header("Content-Type", "application/json") .param("format", "json").param("source", queryWithFrom) .param("source_content_type", "application/json") + .param("size", String.valueOf(docNumber)) .execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); @@ -411,7 +416,8 @@ public void dumpJson() throws IOException { try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") .param("search_type", "query_then_fetch") - .param("format", "json").execute()) { + .param("format", "json") + .param("size", String.valueOf(docNumber)).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber * 2, lines.length); @@ -440,7 +446,7 @@ public void dumpJsonList() throws IOException { // Download All as JSON try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "jsonlist").execute()) { + .param("format", "jsonlist").param("size", Integer.toString(docNumber)).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber + 2, lines.length); @@ -467,10 +473,11 @@ public void dumpJsonList() throws IOException { // Download 10 docs as JSON try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json").param("q", "*:*") - .param("format", "jsonlist").param("from", "5").execute()) { + .param("format", "jsonlist").param("from", "5") + .param("size", Integer.toString(docNumber)).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); - assertEquals(15 + 2, lines.length); + assertEquals(docNumber - 5 + 2, lines.length); } // Download all the docs from the 5th as JSON @@ -488,7 +495,9 @@ public void dumpJsonList() throws IOException { // Download All as JSON with Query and from try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "jsonlist").body(queryWithFrom).execute()) { + .param("format", "jsonlist") + .param("size", String.valueOf(docNumber)) + .body(queryWithFrom).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals((docNumber - 5) + 2, lines.length); @@ -499,7 +508,7 @@ public void dumpJsonList() throws IOException { .header("Content-Type", "application/json") .param("format", "jsonlist").param("source", queryWithFrom) .param("source_content_type", "application/json") - .execute()) { + .param("size", String.valueOf(docNumber)).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals((docNumber - 5) + 2, lines.length); @@ -509,7 +518,8 @@ public void dumpJsonList() throws IOException { try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") .param("search_type", "query_then_fetch") - .param("format", "jsonlist").execute()) { + .param("format", "jsonlist") + .param("size", String.valueOf(docNumber)).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber + 2, lines.length); @@ -540,7 +550,7 @@ public void dumpGeoJson() throws IOException { // default call try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset1/_data") .header("Content-Type", "application/json") - .param("format", "geojson").execute()) { + .param("format", "geojson").param("size", Integer.toString(docNumber)).execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber + 2, lines.length); @@ -658,7 +668,9 @@ public void dumpSizeLimit() throws IOException { // Default try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "csv").execute()) { + .param("format", "csv") + .param("size", Integer.toString(docNumber)) + .execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber + 1, lines.length); @@ -667,7 +679,10 @@ public void dumpSizeLimit() throws IOException { // 50% try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "csv").param("limit", "50%").execute()) { + .param("format", "csv") + .param("size", Integer.toString(docNumber)) + .param("limit", "50%") + .execute()) { final String content = curlResponse.getContentAsString(); final String[] lines = content.split("\n"); assertEquals(docNumber + 1, lines.length); @@ -676,7 +691,10 @@ public void dumpSizeLimit() throws IOException { //0% try (CurlResponse curlResponse = EcrCurl.get(node, "/dataset0/_data") .header("Content-Type", "application/json") - .param("format", "csv").param("limit", "0").execute()) { + .param("format", "csv") + .param("size", Integer.toString(docNumber)) + .param("limit", "0") + .execute()) { assertEquals(500, curlResponse.getHttpStatusCode()); } } @@ -762,7 +780,7 @@ private static File createTempFile(String prefix, String suffix) { } private CurlRequest createRequest(Node node, String path, Map params) { - CurlRequest request = EcrCurl.get(node, path).header("Content-Type", "application/json"); + CurlRequest request = EcrCurl.get(node, path).header("Content-Type", "application/json").param("size", Integer.toString(docNumber)); for (final Map.Entry entry : params.entrySet()) { request.param(entry.getKey(), entry.getValue()); }