Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/release-notes/9331-extract-bounding-box.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
An attempt will be made to extract a geospatial bounding box (west, south, east, north) from NetCDF and HDF5 files and then insert these values into the geospatial metadata block, if enabled.
2 changes: 2 additions & 0 deletions doc/sphinx-guides/source/user/dataset-management.rst
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,8 @@ For NetCDF and HDF5 files, an attempt will be made to extract metadata in NcML_

.. _NcML: https://docs.unidata.ucar.edu/netcdf-java/current/userguide/ncml_overview.html

In addition, an attempt will be made to extract a geospatial bounding box (west, south, east, north) from NetCDF and HDF5 files and then insert these values into the geospatial metadata block, if enabled.

Compressed Files
----------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ public class DatasetFieldConstant implements java.io.Serializable {
public final static String geographicUnit="geographicUnit";
public final static String westLongitude="westLongitude";
public final static String eastLongitude="eastLongitude";
public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB
public final static String southLatitude="southLongitude"; //Incorrect in DB
public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB: https://github.com/IQSS/dataverse/issues/5645
public final static String southLatitude="southLongitude"; //Incorrect in DB: https://github.com/IQSS/dataverse/issues/5645
public final static String unitOfAnalysis="unitOfAnalysis";
public final static String universe="universe";
public final static String kindOfData="kindOfData";
Expand Down
184 changes: 153 additions & 31 deletions src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.netcdf;

import edu.harvard.iq.dataverse.DatasetFieldConstant;
import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor;
import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataIngest;
import edu.harvard.iq.dataverse.ingest.metadataextraction.spi.FileMetadataExtractorSpi;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import ucar.ma2.DataType;
import ucar.nc2.Attribute;
import ucar.nc2.NetcdfFile;
import ucar.nc2.NetcdfFiles;

public class NetcdfFileMetadataExtractor extends FileMetadataExtractor {

private static final Logger logger = Logger.getLogger(NetcdfFileMetadataExtractor.class.getCanonicalName());

public static final String WEST_LONGITUDE_KEY = "geospatial_lon_min";
public static final String EAST_LONGITUDE_KEY = "geospatial_lon_max";
public static final String NORTH_LATITUDE_KEY = "geospatial_lat_max";
public static final String SOUTH_LATITUDE_KEY = "geospatial_lat_min";

private static final String GEOSPATIAL_BLOCK_NAME = "geospatial";
private static final String WEST_LONGITUDE = DatasetFieldConstant.westLongitude;
private static final String EAST_LONGITUDE = DatasetFieldConstant.eastLongitude;
private static final String NORTH_LATITUDE = DatasetFieldConstant.northLatitude;
private static final String SOUTH_LATITUDE = DatasetFieldConstant.southLatitude;

public NetcdfFileMetadataExtractor(FileMetadataExtractorSpi originatingProvider) {
super(originatingProvider);
}

public NetcdfFileMetadataExtractor() {
super(null);
}

@Override
public FileMetadataIngest ingest(BufferedInputStream stream) throws IOException {
throw new UnsupportedOperationException("Not supported yet.");
}

public FileMetadataIngest ingestFile(File file) throws IOException {
FileMetadataIngest fileMetadataIngest = new FileMetadataIngest();
fileMetadataIngest.setMetadataBlockName(GEOSPATIAL_BLOCK_NAME);

Map<String, String> geoFields = parseGeospatial(getNetcdfFile(file));
WestAndEastLongitude welong = getStandardLongitude(new WestAndEastLongitude(geoFields.get(WEST_LONGITUDE), geoFields.get(EAST_LONGITUDE)));
String westLongitudeFinal = welong != null ? welong.getWestLongitude() : null;
String eastLongitudeFinal = welong != null ? welong.getEastLongitude() : null;
String northLatitudeFinal = geoFields.get(NORTH_LATITUDE);
String southLatitudeFinal = geoFields.get(SOUTH_LATITUDE);

logger.info(getLineStringsUrl(westLongitudeFinal, southLatitudeFinal, eastLongitudeFinal, northLatitudeFinal));

Map<String, Set<String>> metadataMap = new HashMap<>();
metadataMap.put(WEST_LONGITUDE, new HashSet<>());
metadataMap.get(WEST_LONGITUDE).add(westLongitudeFinal);
metadataMap.put(EAST_LONGITUDE, new HashSet<>());
metadataMap.get(EAST_LONGITUDE).add(eastLongitudeFinal);
metadataMap.put(NORTH_LATITUDE, new HashSet<>());
metadataMap.get(NORTH_LATITUDE).add(northLatitudeFinal);
metadataMap.put(SOUTH_LATITUDE, new HashSet<>());
metadataMap.get(SOUTH_LATITUDE).add(southLatitudeFinal);
fileMetadataIngest.setMetadataMap(metadataMap);
return fileMetadataIngest;
}

public NetcdfFile getNetcdfFile(File file) throws IOException {
/**
* <attribute name="geospatial_lat_min" value="25.066666666666666" />
* south
* <attribute name="geospatial_lat_max" value="49.40000000000000" />
* north
* <attribute name="geospatial_lon_min" value="-124.7666666333333" />
* west
* <attribute name="geospatial_lon_max" value="-67.058333300000015" />
* east
* <attribute name="geospatial_lon_resolution" value="0.041666666666666" />
* <attribute name="geospatial_lat_resolution" value="0.041666666666666" />
* <attribute name="geospatial_lat_units" value="decimal_degrees north" />
* <attribute name="geospatial_lon_units" value="decimal_degrees east" />
*/
return NetcdfFiles.open(file.getAbsolutePath());
}

private Map<String, String> parseGeospatial(NetcdfFile netcdfFile) {
Map<String, String> geoFields = new HashMap<>();

Attribute westLongitude = netcdfFile.findGlobalAttribute(WEST_LONGITUDE_KEY);
Attribute eastLongitude = netcdfFile.findGlobalAttribute(EAST_LONGITUDE_KEY);
Attribute northLatitude = netcdfFile.findGlobalAttribute(NORTH_LATITUDE_KEY);
Attribute southLatitude = netcdfFile.findGlobalAttribute(SOUTH_LATITUDE_KEY);

geoFields.put(DatasetFieldConstant.westLongitude, getValue(westLongitude));
geoFields.put(DatasetFieldConstant.eastLongitude, getValue(eastLongitude));
geoFields.put(DatasetFieldConstant.northLatitude, getValue(northLatitude));
geoFields.put(DatasetFieldConstant.southLatitude, getValue(southLatitude));

logger.info(getLineStringsUrl(
geoFields.get(DatasetFieldConstant.westLongitude),
geoFields.get(DatasetFieldConstant.southLatitude),
geoFields.get(DatasetFieldConstant.eastLongitude),
geoFields.get(DatasetFieldConstant.northLatitude)));

return geoFields;
}

// We store strings in the database.
private String getValue(Attribute attribute) {
if (attribute == null) {
return null;
}
DataType dataType = attribute.getDataType();
if (dataType.isString()) {
return attribute.getStringValue();
} else if (dataType.isNumeric()) {
return attribute.getNumericValue().toString();
} else {
return null;
}
}

// Convert to standard -180 to 180 range by subtracting 360
// if either longitude is greater than 180.
// west south east north
// 343.68, 41.8, 353.78, 49.62 becomes
// -16.320007, 41.8, -6.220001, 49.62 instead
// "If one of them is > 180, the domain is 0:360.
// If one of them is <0, the domain is -180:180.
// If both are between 0 and 180, the answer is indeterminate."
// https://github.com/cf-convention/cf-conventions/issues/435#issuecomment-1505614364
public WestAndEastLongitude getStandardLongitude(WestAndEastLongitude westAndEastLongitude) {
if (westAndEastLongitude == null) {
return null;
}
if (westAndEastLongitude.getWestLongitude() == null || westAndEastLongitude.getEastLongitude() == null) {
return null;
}
float eastAsFloat;
float westAsFloat;
try {
westAsFloat = Float.valueOf(westAndEastLongitude.getWestLongitude());
eastAsFloat = Float.valueOf(westAndEastLongitude.getEastLongitude());
} catch (NumberFormatException ex) {
return null;
}
if (westAsFloat > 180 || eastAsFloat > 180) {
Float westStandard = westAsFloat - 360;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that if we're converting 0-360 to -180-180 then we need to subtract just 180 from the given value, not 360.

Copy link
Copy Markdown
Contributor

@JR-1991 JR-1991 May 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sekmiller I was also puzzled with this at the beginning, but subtracting 360° is the correct procedure. The math only tells half the truth, because the prime meridian (0° longitude) needs to stay at Greenwich and thus the interval's "cycle point" where both ends meet changes by 180°. Thus, this has to be attributed for, which leads to the 360°.

That's how I understood it so far, but this illustration may explain it better:

image

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JR-1991 thanks! Even with that explanation, it's hard to keep straight in my head.

I was relying on a tool to check that maps are the same. That is these two URLs (minus 360) show the same map:

Here's a screenshot of the maps (and a bit more commentary) I originally posted at https://dataverse.zulipchat.com/#narrow/stream/376593-geospatial/topic/extract.20lat.2Flong.20.239331/near/348542658

Screen Shot 2023-05-03 at 9 20 22 AM

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JR-1991 OK. Looking at the globe and the example location makes sense when the value is between 180 and 360, but what the code does here is if the value for either is over 180 it subtracts 360 from both so there is a potential for a situation where one val is > 180 and the other is less and in that case, say 170, and subtracting 360 results in -190 which is off the scale below -180.

Copy link
Copy Markdown
Contributor

@JR-1991 JR-1991 May 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sekmiller thanks for the review! Indeed subtracting from both would lead to a failure. I tested it visually and in the case of $\theta_1 = [280°, 170°]$ the result should translate to $\theta_2 = [-80°, 170°]$ - Hence if a degree is under 180 it should not be subtracted. I have updated the illustration:

image

I understand it in this way, that within this domain transformation only one half of the circle is changing. Note, in 0° - 360° domain the first half is the same as in the domain of (-180°) - 180° and thus there is a 1:1 relation if degrees are within 0-180. Yet, the other half does change to another range in a more drastic way. Tipping over 180° by a degree suddenly leads you to -179° which in the other domain corresponds to 181° - Hence the 360° provides a valid transformation for this case.

It's quite complicated though 😅

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I should add to the figure, that $f(\theta)$ is defined for $\theta \in [0°, 360°]$ and if $\theta &lt; 0°$ its already in the desired domain.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to address all this in 690b8a4 just now and left this comment about it: #9541 (comment)

Maybe I should have resolve other conversation and gotten it down to just this one. Oh well. Please see the other comment.

Copy link
Copy Markdown
Member

@atrisovic atrisovic May 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I read through the thread and agree with the comments! Instead of the lines 154-182, we can check each coordinate by itself and it would work well, ie:

Float westStandard = westAsFloat;
Float eastStandard = eastAsFloat;

 if (westAsFloat > 180) {
             westStandard = westAsFloat - 360;
}

if (eastAsFloat > 180){
             eastStandard = eastAsFloat - 360;
}

WestAndEastLongitude updatedWeLong = new WestAndEastLongitude(westStandard.toString(), eastStandard.toString());
return updatedWeLong;

Float eastStandard = eastAsFloat - 360;
WestAndEastLongitude updatedWeLong = new WestAndEastLongitude(westStandard.toString(), eastStandard.toString());
return updatedWeLong;
}
return westAndEastLongitude;
}

// Generates a handy link to see what the bounding box looks like on a map
private String getLineStringsUrl(String west, String south, String east, String north) {
// BBOX (Left (LON) ,Bottom (LAT), Right (LON), Top (LAT), comma separated, with or without decimal point):
return "https://linestrings.com/bbox/#" + west + "," + south + "," + east + "," + north;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.netcdf;

import java.util.Objects;

public class WestAndEastLongitude {

private final String westLongitude;
private final String eastLongitude;

public WestAndEastLongitude(String westLongitude, String eastLongitude) {
this.westLongitude = westLongitude;
this.eastLongitude = eastLongitude;
}

public String getWestLongitude() {
return westLongitude;
}

public String getEastLongitude() {
return eastLongitude;
}

@Override
public String toString() {
return "WestAndEastLongitude{" + "westLongitude=" + westLongitude + ", eastLongitude=" + eastLongitude + '}';
}

@Override
public int hashCode() {
int hash = 3;
return hash;
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final WestAndEastLongitude other = (WestAndEastLongitude) obj;
if (!Objects.equals(this.westLongitude, other.westLongitude)) {
return false;
}
return Objects.equals(this.eastLongitude, other.eastLongitude);
}

}
78 changes: 78 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/FitsIT.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package edu.harvard.iq.dataverse.api;

import com.jayway.restassured.RestAssured;
import static com.jayway.restassured.path.json.JsonPath.with;
import com.jayway.restassured.response.Response;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import javax.json.Json;
import javax.json.JsonObject;
import static javax.ws.rs.core.Response.Status.CREATED;
import static javax.ws.rs.core.Response.Status.OK;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;
import org.junit.Test;

public class FitsIT {

@BeforeClass
public static void setUp() {
RestAssured.baseURI = UtilIT.getRestAssuredBaseUri();
}

@Test
public void testAstroFieldsFromFits() throws IOException {
Response createUser = UtilIT.createRandomUser();
createUser.then().assertThat().statusCode(OK.getStatusCode());
String apiToken = UtilIT.getApiTokenFromResponse(createUser);
String username = UtilIT.getUsernameFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
createDataverseResponse.prettyPrint();
createDataverseResponse.then().assertThat()
.statusCode(CREATED.getStatusCode());

String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response setMetadataBlocks = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken);
setMetadataBlocks.prettyPrint();
setMetadataBlocks.then().assertThat().statusCode(OK.getStatusCode());

Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
createDataset.prettyPrint();
createDataset.then().assertThat()
.statusCode(CREATED.getStatusCode());

Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset);

// "FOS 2 x 2064 primary array spectrum containing the flux and wavelength arrays, plus a small table extension"
// from https://fits.gsfc.nasa.gov/fits_samples.html
String pathToFile = "src/test/resources/fits/FOSy19g0309t_c2f.fits";

Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
uploadFile.prettyPrint();
uploadFile.then().assertThat().statusCode(OK.getStatusCode());

Response getJson = UtilIT.nativeGet(datasetId, apiToken);
getJson.prettyPrint();
getJson.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.latestVersion.metadataBlocks.astrophysics.fields[0].value[0]", equalTo("Image"));

// a bit more precise than the check for "Image" above (but annoyingly fiddly)
List<JsonObject> astroTypeFromNativeGet = with(getJson.body().asString()).param("astroType", "astroType")
.getJsonObject("data.latestVersion.metadataBlocks.astrophysics.fields.findAll { fields -> fields.typeName == astroType }");
Map firstAstroTypeFromNativeGet = astroTypeFromNativeGet.get(0);
assertTrue(firstAstroTypeFromNativeGet.toString().contains("Image"));

List<JsonObject> coverageTemportalFromNativeGet = with(getJson.body().asString()).param("coverageTemporal", "coverage.Temporal")
.getJsonObject("data.latestVersion.metadataBlocks.astrophysics.fields.findAll { fields -> fields.typeName == coverageTemporal }");
Map firstcoverageTemporalFromNativeGet = coverageTemportalFromNativeGet.get(0);
assertTrue(firstcoverageTemporalFromNativeGet.toString().contains("1993"));

}

}
45 changes: 45 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import javax.json.Json;
import static javax.ws.rs.core.Response.Status.CREATED;
import static javax.ws.rs.core.Response.Status.FORBIDDEN;
import static javax.ws.rs.core.Response.Status.NOT_FOUND;
Expand Down Expand Up @@ -179,4 +180,48 @@ public void testNmclFromNetcdfErrorChecking() throws IOException {

}

@Test
public void testExtraBoundingBoxFromNetcdf() throws IOException {
Response createUser = UtilIT.createRandomUser();
createUser.then().assertThat().statusCode(OK.getStatusCode());
String apiToken = UtilIT.getApiTokenFromResponse(createUser);
String username = UtilIT.getUsernameFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
createDataverseResponse.prettyPrint();
createDataverseResponse.then().assertThat()
.statusCode(CREATED.getStatusCode());

String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response setMetadataBlocks = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("geospatial"), apiToken);
setMetadataBlocks.prettyPrint();
setMetadataBlocks.then().assertThat().statusCode(OK.getStatusCode());

Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
createDataset.prettyPrint();
createDataset.then().assertThat()
.statusCode(CREATED.getStatusCode());

Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset);

// From https://www.ncei.noaa.gov/data/international-comprehensive-ocean-atmosphere/v3/archive/nrt/ICOADS_R3.0.0_1662-10.nc
// via https://data.noaa.gov/onestop/collections/details/9bd5c743-0684-4e70-817a-ed977117f80c?f=temporalResolution:1%20Minute%20-%20%3C%201%20Hour;dataFormats:NETCDF
String pathToFile = "src/test/resources/netcdf/ICOADS_R3.0.0_1662-10.nc";

Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
uploadFile.prettyPrint();
uploadFile.then().assertThat().statusCode(OK.getStatusCode());

Response getJson = UtilIT.nativeGet(datasetId, apiToken);
getJson.prettyPrint();
getJson.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].westLongitude.value", equalTo("-16.320007"))
.body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].eastLongitude.value", equalTo("-6.220001"))
.body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].northLongitude.value", equalTo("49.62"))
.body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].southLongitude.value", equalTo("41.8"));
}

}
Loading