|
| 1 | +package edu.harvard.iq.dataverse.ingest.metadataextraction.impl.plugins.netcdf; |
| 2 | + |
| 3 | +import edu.harvard.iq.dataverse.DatasetFieldConstant; |
| 4 | +import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor; |
| 5 | +import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataIngest; |
| 6 | +import edu.harvard.iq.dataverse.ingest.metadataextraction.spi.FileMetadataExtractorSpi; |
| 7 | +import java.io.BufferedInputStream; |
| 8 | +import java.io.File; |
| 9 | +import java.io.IOException; |
| 10 | +import java.util.HashMap; |
| 11 | +import java.util.HashSet; |
| 12 | +import java.util.Map; |
| 13 | +import java.util.Set; |
| 14 | +import java.util.logging.Logger; |
| 15 | +import ucar.ma2.DataType; |
| 16 | +import ucar.nc2.Attribute; |
| 17 | +import ucar.nc2.NetcdfFile; |
| 18 | +import ucar.nc2.NetcdfFiles; |
| 19 | + |
| 20 | +public class NetcdfFileMetadataExtractor extends FileMetadataExtractor { |
| 21 | + |
| 22 | + private static final Logger logger = Logger.getLogger(NetcdfFileMetadataExtractor.class.getCanonicalName()); |
| 23 | + |
| 24 | + public static final String WEST_LONGITUDE_KEY = "geospatial_lon_min"; |
| 25 | + public static final String EAST_LONGITUDE_KEY = "geospatial_lon_max"; |
| 26 | + public static final String NORTH_LATITUDE_KEY = "geospatial_lat_max"; |
| 27 | + public static final String SOUTH_LATITUDE_KEY = "geospatial_lat_min"; |
| 28 | + |
| 29 | + private static final String GEOSPATIAL_BLOCK_NAME = "geospatial"; |
| 30 | + private static final String WEST_LONGITUDE = DatasetFieldConstant.westLongitude; |
| 31 | + private static final String EAST_LONGITUDE = DatasetFieldConstant.eastLongitude; |
| 32 | + private static final String NORTH_LATITUDE = DatasetFieldConstant.northLatitude; |
| 33 | + private static final String SOUTH_LATITUDE = DatasetFieldConstant.southLatitude; |
| 34 | + |
| 35 | + public NetcdfFileMetadataExtractor(FileMetadataExtractorSpi originatingProvider) { |
| 36 | + super(originatingProvider); |
| 37 | + } |
| 38 | + |
| 39 | + public NetcdfFileMetadataExtractor() { |
| 40 | + super(null); |
| 41 | + } |
| 42 | + |
| 43 | + @Override |
| 44 | + public FileMetadataIngest ingest(BufferedInputStream stream) throws IOException { |
| 45 | + throw new UnsupportedOperationException("Not supported yet."); |
| 46 | + } |
| 47 | + |
| 48 | + public FileMetadataIngest ingestFile(File file) throws IOException { |
| 49 | + FileMetadataIngest fileMetadataIngest = new FileMetadataIngest(); |
| 50 | + fileMetadataIngest.setMetadataBlockName(GEOSPATIAL_BLOCK_NAME); |
| 51 | + |
| 52 | + Map<String, String> geoFields = parseGeospatial(getNetcdfFile(file)); |
| 53 | + WestAndEastLongitude welong = getStandardLongitude(new WestAndEastLongitude(geoFields.get(WEST_LONGITUDE), geoFields.get(EAST_LONGITUDE))); |
| 54 | + String westLongitudeFinal = welong != null ? welong.getWestLongitude() : null; |
| 55 | + String eastLongitudeFinal = welong != null ? welong.getEastLongitude() : null; |
| 56 | + String northLatitudeFinal = geoFields.get(NORTH_LATITUDE); |
| 57 | + String southLatitudeFinal = geoFields.get(SOUTH_LATITUDE); |
| 58 | + |
| 59 | + logger.info(getLineStringsUrl(westLongitudeFinal, southLatitudeFinal, eastLongitudeFinal, northLatitudeFinal)); |
| 60 | + |
| 61 | + Map<String, Set<String>> metadataMap = new HashMap<>(); |
| 62 | + metadataMap.put(WEST_LONGITUDE, new HashSet<>()); |
| 63 | + metadataMap.get(WEST_LONGITUDE).add(westLongitudeFinal); |
| 64 | + metadataMap.put(EAST_LONGITUDE, new HashSet<>()); |
| 65 | + metadataMap.get(EAST_LONGITUDE).add(eastLongitudeFinal); |
| 66 | + metadataMap.put(NORTH_LATITUDE, new HashSet<>()); |
| 67 | + metadataMap.get(NORTH_LATITUDE).add(northLatitudeFinal); |
| 68 | + metadataMap.put(SOUTH_LATITUDE, new HashSet<>()); |
| 69 | + metadataMap.get(SOUTH_LATITUDE).add(southLatitudeFinal); |
| 70 | + fileMetadataIngest.setMetadataMap(metadataMap); |
| 71 | + return fileMetadataIngest; |
| 72 | + } |
| 73 | + |
| 74 | + public NetcdfFile getNetcdfFile(File file) throws IOException { |
| 75 | + /** |
| 76 | + * <attribute name="geospatial_lat_min" value="25.066666666666666" /> |
| 77 | + * south |
| 78 | + * <attribute name="geospatial_lat_max" value="49.40000000000000" /> |
| 79 | + * north |
| 80 | + * <attribute name="geospatial_lon_min" value="-124.7666666333333" /> |
| 81 | + * west |
| 82 | + * <attribute name="geospatial_lon_max" value="-67.058333300000015" /> |
| 83 | + * east |
| 84 | + * <attribute name="geospatial_lon_resolution" value="0.041666666666666" /> |
| 85 | + * <attribute name="geospatial_lat_resolution" value="0.041666666666666" /> |
| 86 | + * <attribute name="geospatial_lat_units" value="decimal_degrees north" /> |
| 87 | + * <attribute name="geospatial_lon_units" value="decimal_degrees east" /> |
| 88 | + */ |
| 89 | + return NetcdfFiles.open(file.getAbsolutePath()); |
| 90 | + } |
| 91 | + |
| 92 | + private Map<String, String> parseGeospatial(NetcdfFile netcdfFile) { |
| 93 | + Map<String, String> geoFields = new HashMap<>(); |
| 94 | + |
| 95 | + Attribute westLongitude = netcdfFile.findGlobalAttribute(WEST_LONGITUDE_KEY); |
| 96 | + Attribute eastLongitude = netcdfFile.findGlobalAttribute(EAST_LONGITUDE_KEY); |
| 97 | + Attribute northLatitude = netcdfFile.findGlobalAttribute(NORTH_LATITUDE_KEY); |
| 98 | + Attribute southLatitude = netcdfFile.findGlobalAttribute(SOUTH_LATITUDE_KEY); |
| 99 | + |
| 100 | + geoFields.put(DatasetFieldConstant.westLongitude, getValue(westLongitude)); |
| 101 | + geoFields.put(DatasetFieldConstant.eastLongitude, getValue(eastLongitude)); |
| 102 | + geoFields.put(DatasetFieldConstant.northLatitude, getValue(northLatitude)); |
| 103 | + geoFields.put(DatasetFieldConstant.southLatitude, getValue(southLatitude)); |
| 104 | + |
| 105 | + logger.info(getLineStringsUrl( |
| 106 | + geoFields.get(DatasetFieldConstant.westLongitude), |
| 107 | + geoFields.get(DatasetFieldConstant.southLatitude), |
| 108 | + geoFields.get(DatasetFieldConstant.eastLongitude), |
| 109 | + geoFields.get(DatasetFieldConstant.northLatitude))); |
| 110 | + |
| 111 | + return geoFields; |
| 112 | + } |
| 113 | + |
| 114 | + // We store strings in the database. |
| 115 | + private String getValue(Attribute attribute) { |
| 116 | + if (attribute == null) { |
| 117 | + return null; |
| 118 | + } |
| 119 | + DataType dataType = attribute.getDataType(); |
| 120 | + if (dataType.isString()) { |
| 121 | + return attribute.getStringValue(); |
| 122 | + } else if (dataType.isNumeric()) { |
| 123 | + return attribute.getNumericValue().toString(); |
| 124 | + } else { |
| 125 | + return null; |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + // Convert to standard -180 to 180 range by subtracting 360 |
| 130 | + // if both longitudea are greater than 180. For example: |
| 131 | + // west south east north |
| 132 | + // 343.68, 41.8, 353.78, 49.62 becomes |
| 133 | + // -16.320007, 41.8, -6.220001, 49.62 instead |
| 134 | + // "If one of them is > 180, the domain is 0:360. |
| 135 | + // If one of them is <0, the domain is -180:180. |
| 136 | + // If both are between 0 and 180, the answer is indeterminate." |
| 137 | + // https://github.com/cf-convention/cf-conventions/issues/435#issuecomment-1505614364 |
| 138 | + // Solr only wants -180 to 180. It will throw an error for values outside this range. |
| 139 | + public WestAndEastLongitude getStandardLongitude(WestAndEastLongitude westAndEastLongitude) { |
| 140 | + if (westAndEastLongitude == null) { |
| 141 | + return null; |
| 142 | + } |
| 143 | + if (westAndEastLongitude.getWestLongitude() == null || westAndEastLongitude.getEastLongitude() == null) { |
| 144 | + return null; |
| 145 | + } |
| 146 | + float eastAsFloat; |
| 147 | + float westAsFloat; |
| 148 | + try { |
| 149 | + westAsFloat = Float.valueOf(westAndEastLongitude.getWestLongitude()); |
| 150 | + eastAsFloat = Float.valueOf(westAndEastLongitude.getEastLongitude()); |
| 151 | + } catch (NumberFormatException ex) { |
| 152 | + return null; |
| 153 | + } |
| 154 | + // "If one of them is > 180, the domain is 0:360" |
| 155 | + if (westAsFloat > 180 && eastAsFloat > 180) { |
| 156 | + Float westStandard = westAsFloat - 360; |
| 157 | + Float eastStandard = eastAsFloat - 360; |
| 158 | + WestAndEastLongitude updatedWeLong = new WestAndEastLongitude(westStandard.toString(), eastStandard.toString()); |
| 159 | + return updatedWeLong; |
| 160 | + } |
| 161 | + // "If one of them is <0, the domain is -180:180." |
| 162 | + // 180:180 is what Solr wants. Return it. |
| 163 | + if (westAsFloat < 0 || eastAsFloat < 0) { |
| 164 | + // BUT! Don't return it if the values |
| 165 | + // are so low to be out of range! |
| 166 | + // Something must be wrong with the data. |
| 167 | + if (westAsFloat < -180 || eastAsFloat < -180) { |
| 168 | + return null; |
| 169 | + } |
| 170 | + if (westAsFloat > 180 || eastAsFloat > 180) { |
| 171 | + // Not in the proper range of -80:180 |
| 172 | + return null; |
| 173 | + } |
| 174 | + return westAndEastLongitude; |
| 175 | + } |
| 176 | + if ((westAsFloat > 180 || eastAsFloat > 180) && (westAsFloat < 180 || eastAsFloat < 180)) { |
| 177 | + // One value is over 180 and the other is under 180. |
| 178 | + // We don't know if we should subtract 360 or not. |
| 179 | + // Return null to prevent inserting a potentially |
| 180 | + // incorrect bounding box. |
| 181 | + return null; |
| 182 | + } |
| 183 | + return westAndEastLongitude; |
| 184 | + } |
| 185 | + |
| 186 | + // Generates a handy link to see what the bounding box looks like on a map |
| 187 | + private String getLineStringsUrl(String west, String south, String east, String north) { |
| 188 | + // BBOX (Left (LON) ,Bottom (LAT), Right (LON), Top (LAT), comma separated, with or without decimal point): |
| 189 | + return "https://linestrings.com/bbox/#" + west + "," + south + "," + east + "," + north; |
| 190 | + } |
| 191 | + |
| 192 | +} |
0 commit comments