Skip to content

Commit 85bde07

Browse files
authored
Merge pull request #9541 from IQSS/9331-extract-bounding-box2
extract bounding box from NetCDF/HDF5 files and insert into the geospatial metadata block
2 parents f02ad47 + 85d4bf2 commit 85bde07

12 files changed

Lines changed: 672 additions & 34 deletions

File tree

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
An attempt will be made to extract a geospatial bounding box (west, south, east, north) from NetCDF and HDF5 files and then insert these values into the geospatial metadata block, if enabled.

doc/sphinx-guides/source/user/dataset-management.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,34 @@ A map will be shown as a preview of GeoJSON files when the previewer has been en
346346
NetCDF and HDF5
347347
---------------
348348

349+
NcML
350+
~~~~
351+
349352
For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_ (XML) format and save it as an auxiliary file. (See also :doc:`/developers/aux-file-support` in the Developer Guide.) A previewer for these NcML files is available (see :ref:`file-previews`).
350353

351354
.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html
352355

356+
Geospatial Bounding Box
357+
~~~~~~~~~~~~~~~~~~~~~~~
358+
359+
An attempt will be made to extract a geospatial bounding box (west, south, east, north) from NetCDF and HDF5 files and then insert these values into the geospatial metadata block, if enabled.
360+
361+
This is the mapping that is used:
362+
363+
- geospatial_lon_min: West Longitude
364+
- geospatial_lon_max: East Longitude
365+
- geospatial_lat_max: North Latitude
366+
- geospatial_lat_min: South Latitude
367+
368+
Please note the following rules regarding these fields:
369+
370+
- West Longitude and East Longitude are expected to be in the range of -180 and 180. (When using :ref:`geospatial-search`, you should use this range for longitude.)
371+
- If West Longitude and East Longitude are both over 180 (outside the expected -180:180 range), 360 will be subtracted to shift the values from the 0:360 range to the expected -180:180 range.
372+
- If either West Longitude or East Longitude are less than zero but the other longitude is greater than 180 (which would imply an indeterminate domain, a lack of clarity of if the domain is -180:180 or 0:360), metadata will be not be extracted.
373+
- If the bounding box was successfully populated, the subsequent removal of the NetCDF or HDF5 file from the dataset does not automatically remove the bounding box from the dataset metadata. You must remove the bounding box manually, if desired.
374+
375+
If the bounding box was successfully populated, :ref:`geospatial-search` should be able to find it.
376+
353377
Compressed Files
354378
----------------
355379

src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ public class DatasetFieldConstant implements java.io.Serializable {
112112
public final static String geographicUnit="geographicUnit";
113113
public final static String westLongitude="westLongitude";
114114
public final static String eastLongitude="eastLongitude";
115-
public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB
116-
public final static String southLatitude="southLongitude"; //Incorrect in DB
115+
public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB: https://github.com/IQSS/dataverse/issues/5645
116+
public final static String southLatitude="southLongitude"; //Incorrect in DB: https://github.com/IQSS/dataverse/issues/5645
117117
public final static String unitOfAnalysis="unitOfAnalysis";
118118
public final static String universe="universe";
119119
public final static String kindOfData="kindOfData";

src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java

Lines changed: 153 additions & 31 deletions
Large diffs are not rendered by default.
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
package edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.netcdf;
2+
3+
import edu.harvard.iq.dataverse.DatasetFieldConstant;
4+
import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor;
5+
import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataIngest;
6+
import edu.harvard.iq.dataverse.ingest.metadataextraction.spi.FileMetadataExtractorSpi;
7+
import java.io.BufferedInputStream;
8+
import java.io.File;
9+
import java.io.IOException;
10+
import java.util.HashMap;
11+
import java.util.HashSet;
12+
import java.util.Map;
13+
import java.util.Set;
14+
import java.util.logging.Logger;
15+
import ucar.ma2.DataType;
16+
import ucar.nc2.Attribute;
17+
import ucar.nc2.NetcdfFile;
18+
import ucar.nc2.NetcdfFiles;
19+
20+
public class NetcdfFileMetadataExtractor extends FileMetadataExtractor {
21+
22+
private static final Logger logger = Logger.getLogger(NetcdfFileMetadataExtractor.class.getCanonicalName());
23+
24+
public static final String WEST_LONGITUDE_KEY = "geospatial_lon_min";
25+
public static final String EAST_LONGITUDE_KEY = "geospatial_lon_max";
26+
public static final String NORTH_LATITUDE_KEY = "geospatial_lat_max";
27+
public static final String SOUTH_LATITUDE_KEY = "geospatial_lat_min";
28+
29+
private static final String GEOSPATIAL_BLOCK_NAME = "geospatial";
30+
private static final String WEST_LONGITUDE = DatasetFieldConstant.westLongitude;
31+
private static final String EAST_LONGITUDE = DatasetFieldConstant.eastLongitude;
32+
private static final String NORTH_LATITUDE = DatasetFieldConstant.northLatitude;
33+
private static final String SOUTH_LATITUDE = DatasetFieldConstant.southLatitude;
34+
35+
public NetcdfFileMetadataExtractor(FileMetadataExtractorSpi originatingProvider) {
36+
super(originatingProvider);
37+
}
38+
39+
public NetcdfFileMetadataExtractor() {
40+
super(null);
41+
}
42+
43+
@Override
44+
public FileMetadataIngest ingest(BufferedInputStream stream) throws IOException {
45+
throw new UnsupportedOperationException("Not supported yet.");
46+
}
47+
48+
public FileMetadataIngest ingestFile(File file) throws IOException {
49+
FileMetadataIngest fileMetadataIngest = new FileMetadataIngest();
50+
fileMetadataIngest.setMetadataBlockName(GEOSPATIAL_BLOCK_NAME);
51+
52+
Map<String, String> geoFields = parseGeospatial(getNetcdfFile(file));
53+
WestAndEastLongitude welong = getStandardLongitude(new WestAndEastLongitude(geoFields.get(WEST_LONGITUDE), geoFields.get(EAST_LONGITUDE)));
54+
String westLongitudeFinal = welong != null ? welong.getWestLongitude() : null;
55+
String eastLongitudeFinal = welong != null ? welong.getEastLongitude() : null;
56+
String northLatitudeFinal = geoFields.get(NORTH_LATITUDE);
57+
String southLatitudeFinal = geoFields.get(SOUTH_LATITUDE);
58+
59+
logger.info(getLineStringsUrl(westLongitudeFinal, southLatitudeFinal, eastLongitudeFinal, northLatitudeFinal));
60+
61+
Map<String, Set<String>> metadataMap = new HashMap<>();
62+
metadataMap.put(WEST_LONGITUDE, new HashSet<>());
63+
metadataMap.get(WEST_LONGITUDE).add(westLongitudeFinal);
64+
metadataMap.put(EAST_LONGITUDE, new HashSet<>());
65+
metadataMap.get(EAST_LONGITUDE).add(eastLongitudeFinal);
66+
metadataMap.put(NORTH_LATITUDE, new HashSet<>());
67+
metadataMap.get(NORTH_LATITUDE).add(northLatitudeFinal);
68+
metadataMap.put(SOUTH_LATITUDE, new HashSet<>());
69+
metadataMap.get(SOUTH_LATITUDE).add(southLatitudeFinal);
70+
fileMetadataIngest.setMetadataMap(metadataMap);
71+
return fileMetadataIngest;
72+
}
73+
74+
public NetcdfFile getNetcdfFile(File file) throws IOException {
75+
/**
76+
* <attribute name="geospatial_lat_min" value="25.066666666666666" />
77+
* south
78+
* <attribute name="geospatial_lat_max" value="49.40000000000000" />
79+
* north
80+
* <attribute name="geospatial_lon_min" value="-124.7666666333333" />
81+
* west
82+
* <attribute name="geospatial_lon_max" value="-67.058333300000015" />
83+
* east
84+
* <attribute name="geospatial_lon_resolution" value="0.041666666666666" />
85+
* <attribute name="geospatial_lat_resolution" value="0.041666666666666" />
86+
* <attribute name="geospatial_lat_units" value="decimal_degrees north" />
87+
* <attribute name="geospatial_lon_units" value="decimal_degrees east" />
88+
*/
89+
return NetcdfFiles.open(file.getAbsolutePath());
90+
}
91+
92+
private Map<String, String> parseGeospatial(NetcdfFile netcdfFile) {
93+
Map<String, String> geoFields = new HashMap<>();
94+
95+
Attribute westLongitude = netcdfFile.findGlobalAttribute(WEST_LONGITUDE_KEY);
96+
Attribute eastLongitude = netcdfFile.findGlobalAttribute(EAST_LONGITUDE_KEY);
97+
Attribute northLatitude = netcdfFile.findGlobalAttribute(NORTH_LATITUDE_KEY);
98+
Attribute southLatitude = netcdfFile.findGlobalAttribute(SOUTH_LATITUDE_KEY);
99+
100+
geoFields.put(DatasetFieldConstant.westLongitude, getValue(westLongitude));
101+
geoFields.put(DatasetFieldConstant.eastLongitude, getValue(eastLongitude));
102+
geoFields.put(DatasetFieldConstant.northLatitude, getValue(northLatitude));
103+
geoFields.put(DatasetFieldConstant.southLatitude, getValue(southLatitude));
104+
105+
logger.info(getLineStringsUrl(
106+
geoFields.get(DatasetFieldConstant.westLongitude),
107+
geoFields.get(DatasetFieldConstant.southLatitude),
108+
geoFields.get(DatasetFieldConstant.eastLongitude),
109+
geoFields.get(DatasetFieldConstant.northLatitude)));
110+
111+
return geoFields;
112+
}
113+
114+
// We store strings in the database.
115+
private String getValue(Attribute attribute) {
116+
if (attribute == null) {
117+
return null;
118+
}
119+
DataType dataType = attribute.getDataType();
120+
if (dataType.isString()) {
121+
return attribute.getStringValue();
122+
} else if (dataType.isNumeric()) {
123+
return attribute.getNumericValue().toString();
124+
} else {
125+
return null;
126+
}
127+
}
128+
129+
// Convert to standard -180 to 180 range by subtracting 360
130+
// if both longitudea are greater than 180. For example:
131+
// west south east north
132+
// 343.68, 41.8, 353.78, 49.62 becomes
133+
// -16.320007, 41.8, -6.220001, 49.62 instead
134+
// "If one of them is > 180, the domain is 0:360.
135+
// If one of them is <0, the domain is -180:180.
136+
// If both are between 0 and 180, the answer is indeterminate."
137+
// https://github.com/cf-convention/cf-conventions/issues/435#issuecomment-1505614364
138+
// Solr only wants -180 to 180. It will throw an error for values outside this range.
139+
public WestAndEastLongitude getStandardLongitude(WestAndEastLongitude westAndEastLongitude) {
140+
if (westAndEastLongitude == null) {
141+
return null;
142+
}
143+
if (westAndEastLongitude.getWestLongitude() == null || westAndEastLongitude.getEastLongitude() == null) {
144+
return null;
145+
}
146+
float eastAsFloat;
147+
float westAsFloat;
148+
try {
149+
westAsFloat = Float.valueOf(westAndEastLongitude.getWestLongitude());
150+
eastAsFloat = Float.valueOf(westAndEastLongitude.getEastLongitude());
151+
} catch (NumberFormatException ex) {
152+
return null;
153+
}
154+
// "If one of them is > 180, the domain is 0:360"
155+
if (westAsFloat > 180 && eastAsFloat > 180) {
156+
Float westStandard = westAsFloat - 360;
157+
Float eastStandard = eastAsFloat - 360;
158+
WestAndEastLongitude updatedWeLong = new WestAndEastLongitude(westStandard.toString(), eastStandard.toString());
159+
return updatedWeLong;
160+
}
161+
// "If one of them is <0, the domain is -180:180."
162+
// 180:180 is what Solr wants. Return it.
163+
if (westAsFloat < 0 || eastAsFloat < 0) {
164+
// BUT! Don't return it if the values
165+
// are so low to be out of range!
166+
// Something must be wrong with the data.
167+
if (westAsFloat < -180 || eastAsFloat < -180) {
168+
return null;
169+
}
170+
if (westAsFloat > 180 || eastAsFloat > 180) {
171+
// Not in the proper range of -80:180
172+
return null;
173+
}
174+
return westAndEastLongitude;
175+
}
176+
if ((westAsFloat > 180 || eastAsFloat > 180) && (westAsFloat < 180 || eastAsFloat < 180)) {
177+
// One value is over 180 and the other is under 180.
178+
// We don't know if we should subtract 360 or not.
179+
// Return null to prevent inserting a potentially
180+
// incorrect bounding box.
181+
return null;
182+
}
183+
return westAndEastLongitude;
184+
}
185+
186+
// Generates a handy link to see what the bounding box looks like on a map
187+
private String getLineStringsUrl(String west, String south, String east, String north) {
188+
// BBOX (Left (LON) ,Bottom (LAT), Right (LON), Top (LAT), comma separated, with or without decimal point):
189+
return "https://linestrings.com/bbox/#" + west + "," + south + "," + east + "," + north;
190+
}
191+
192+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.netcdf;
2+
3+
import java.util.Objects;
4+
5+
public class WestAndEastLongitude {
6+
7+
private final String westLongitude;
8+
private final String eastLongitude;
9+
10+
public WestAndEastLongitude(String westLongitude, String eastLongitude) {
11+
this.westLongitude = westLongitude;
12+
this.eastLongitude = eastLongitude;
13+
}
14+
15+
public String getWestLongitude() {
16+
return westLongitude;
17+
}
18+
19+
public String getEastLongitude() {
20+
return eastLongitude;
21+
}
22+
23+
@Override
24+
public String toString() {
25+
return "WestAndEastLongitude{" + "westLongitude=" + westLongitude + ", eastLongitude=" + eastLongitude + '}';
26+
}
27+
28+
@Override
29+
public int hashCode() {
30+
int hash = 3;
31+
return hash;
32+
}
33+
34+
@Override
35+
public boolean equals(Object obj) {
36+
if (this == obj) {
37+
return true;
38+
}
39+
if (obj == null) {
40+
return false;
41+
}
42+
if (getClass() != obj.getClass()) {
43+
return false;
44+
}
45+
final WestAndEastLongitude other = (WestAndEastLongitude) obj;
46+
if (!Objects.equals(this.westLongitude, other.westLongitude)) {
47+
return false;
48+
}
49+
return Objects.equals(this.eastLongitude, other.eastLongitude);
50+
}
51+
52+
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package edu.harvard.iq.dataverse.api;
2+
3+
import com.jayway.restassured.RestAssured;
4+
import static com.jayway.restassured.path.json.JsonPath.with;
5+
import com.jayway.restassured.response.Response;
6+
import java.io.IOException;
7+
import java.util.List;
8+
import java.util.Map;
9+
import javax.json.Json;
10+
import javax.json.JsonObject;
11+
import static javax.ws.rs.core.Response.Status.CREATED;
12+
import static javax.ws.rs.core.Response.Status.OK;
13+
import static org.hamcrest.CoreMatchers.equalTo;
14+
import static org.junit.Assert.assertTrue;
15+
import org.junit.BeforeClass;
16+
import org.junit.Test;
17+
18+
public class FitsIT {
19+
20+
@BeforeClass
21+
public static void setUp() {
22+
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
23+
}
24+
25+
@Test
26+
public void testAstroFieldsFromFits() throws IOException {
27+
Response createUser = UtilIT.createRandomUser();
28+
createUser.then().assertThat().statusCode(OK.getStatusCode());
29+
String apiToken = UtilIT.getApiTokenFromResponse(createUser);
30+
String username = UtilIT.getUsernameFromResponse(createUser);
31+
32+
Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
33+
createDataverseResponse.prettyPrint();
34+
createDataverseResponse.then().assertThat()
35+
.statusCode(CREATED.getStatusCode());
36+
37+
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
38+
39+
Response setMetadataBlocks = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken);
40+
setMetadataBlocks.prettyPrint();
41+
setMetadataBlocks.then().assertThat().statusCode(OK.getStatusCode());
42+
43+
Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
44+
createDataset.prettyPrint();
45+
createDataset.then().assertThat()
46+
.statusCode(CREATED.getStatusCode());
47+
48+
Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
49+
String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset);
50+
51+
// "FOS 2 x 2064 primary array spectrum containing the flux and wavelength arrays, plus a small table extension"
52+
// from https://fits.gsfc.nasa.gov/fits_samples.html
53+
String pathToFile = "src/test/resources/fits/FOSy19g0309t_c2f.fits";
54+
55+
Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
56+
uploadFile.prettyPrint();
57+
uploadFile.then().assertThat().statusCode(OK.getStatusCode());
58+
59+
Response getJson = UtilIT.nativeGet(datasetId, apiToken);
60+
getJson.prettyPrint();
61+
getJson.then().assertThat()
62+
.statusCode(OK.getStatusCode())
63+
.body("data.latestVersion.metadataBlocks.astrophysics.fields[0].value[0]", equalTo("Image"));
64+
65+
// a bit more precise than the check for "Image" above (but annoyingly fiddly)
66+
List<JsonObject> astroTypeFromNativeGet = with(getJson.body().asString()).param("astroType", "astroType")
67+
.getJsonObject("data.latestVersion.metadataBlocks.astrophysics.fields.findAll { fields -> fields.typeName == astroType }");
68+
Map firstAstroTypeFromNativeGet = astroTypeFromNativeGet.get(0);
69+
assertTrue(firstAstroTypeFromNativeGet.toString().contains("Image"));
70+
71+
List<JsonObject> coverageTemportalFromNativeGet = with(getJson.body().asString()).param("coverageTemporal", "coverage.Temporal")
72+
.getJsonObject("data.latestVersion.metadataBlocks.astrophysics.fields.findAll { fields -> fields.typeName == coverageTemporal }");
73+
Map firstcoverageTemporalFromNativeGet = coverageTemportalFromNativeGet.get(0);
74+
assertTrue(firstcoverageTemporalFromNativeGet.toString().contains("1993"));
75+
76+
}
77+
78+
}

0 commit comments

Comments
 (0)