Skip to content

Commit 1733eb4

Browse files
authored
Read disambiguation concept loading URL from configuration (#57)
1 parent 5e759c1 commit 1733eb4

13 files changed

Lines changed: 236 additions & 625 deletions

README.md

Lines changed: 29 additions & 583 deletions
Large diffs are not rendered by default.

src/main/java/org/grobid/core/engines/SoftwareParser.java

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ public List<SoftwareEntity> processText(String text, boolean disambiguate) throw
340340
entities = SoftwareContextClassifier.getInstance(softwareConfiguration).classifyDocumentContexts(entities);
341341

342342
} catch (Exception e) {
343-
throw new GrobidException("An exception occured while running Grobid.", e);
343+
throw new GrobidException("An exception occurred while running Grobid.", e);
344344
}
345345

346346
return entities;
@@ -760,7 +760,7 @@ public Pair<List<SoftwareEntity>, Document> processPDF(File file,
760760
}
761761
} catch (Exception e) {
762762
throw new GrobidException(
763-
"An exception occured while running consolidation on bibliographical references.", e);
763+
"An exception occurred while running consolidation on bibliographical references.", e);
764764
}
765765

766766
// propagate the bib. ref. to the entities corresponding to the same software name without bib. ref.
@@ -825,12 +825,12 @@ public Pair<List<SoftwareEntity>, Document> processPDF(File file,
825825
*/
826826
private List<SoftwareEntity> processLayoutTokenSequence(
827827
List<LayoutToken> layoutTokens,
828-
List<SoftwareEntity> entities,
829-
boolean disambiguate,
830-
boolean addParagraphContext,
831-
boolean fromPDF,
832-
boolean fromXML,
833-
List<PDFAnnotation> pdfAnnotations
828+
List<SoftwareEntity> entities,
829+
boolean disambiguate,
830+
boolean addParagraphContext,
831+
boolean fromPDF,
832+
boolean fromXML,
833+
List<PDFAnnotation> pdfAnnotations
834834
) {
835835
List<LayoutTokenization> layoutTokenizations = new ArrayList<LayoutTokenization>();
836836
layoutTokenizations.add(new LayoutTokenization(layoutTokens));
@@ -1475,7 +1475,7 @@ public boolean accept(File dir, String name) {
14751475
String pathTEI = outputDirectory + "/" + file.getName().substring(0, file.getName().length() - 4) + ".training.tei.xml";
14761476
createTraining(file.getAbsolutePath(), pathTEI, n);
14771477
} catch (final Exception exp) {
1478-
logger.error("An error occured while processing the following pdf: "
1478+
logger.error("An error occurred while processing the following pdf: "
14791479
+ file.getPath() + ": " + exp);
14801480
}
14811481
if (ind != -1)
@@ -1484,7 +1484,7 @@ public boolean accept(File dir, String name) {
14841484

14851485
return refFiles.length;
14861486
} catch (final Exception exp) {
1487-
throw new GrobidException("An exception occured while running Grobid batch.", exp);
1487+
throw new GrobidException("An exception occurred while running Grobid batch.", exp);
14881488
}
14891489
}
14901490

@@ -1805,7 +1805,7 @@ public String addFeatures(List<LayoutToken> tokens,
18051805
isSoftwarePattern = false;
18061806
}
18071807
} catch (Exception e) {
1808-
throw new GrobidException("An exception occured while running Grobid.", e);
1808+
throw new GrobidException("An exception occurred while running Grobid.", e);
18091809
}
18101810
return result.toString();
18111811
}
@@ -2108,7 +2108,7 @@ public List<SoftwareComponent> extractSoftwareComponents(String text,
21082108

21092109
// conservative check, minimal well-formedness of the content for URL
21102110
if (clusterLabel.equals(SoftwareTaggingLabels.SOFTWARE_URL)) {
2111-
if (SoftwareAnalyzer.DELIMITERS.indexOf(clusterContent) != -1 ||
2111+
if (SoftwareAnalyzer.DELIMITERS.contains(clusterContent) ||
21122112
SoftwareLexicon.getInstance().isEnglishStopword(clusterContent) ||
21132113
FeatureFactory.getInstance().test_number(clusterContent) ||
21142114
clusterContent.replace("\n", "").equals("//")) {
@@ -2473,7 +2473,7 @@ public Pair<List<SoftwareEntity>, List<BibDataSet>> processXML(File file,
24732473
//tei = restoreDomParserAttributeBug(tei);
24742474

24752475
} catch (final Exception exp) {
2476-
logger.error("An error occured while processing the following XML file: "
2476+
logger.error("An error occurred while processing the following XML file: "
24772477
+ file.getPath(), exp);
24782478
}
24792479

@@ -2498,7 +2498,7 @@ public Pair<List<SoftwareEntity>, List<BibDataSet>> processTEI(File file,
24982498
//tei = restoreDomParserAttributeBug(tei);
24992499

25002500
} catch (final Exception exp) {
2501-
logger.error("An error occured while processing the following XML file: "
2501+
logger.error("An error occurred while processing the following XML file: "
25022502
+ file.getPath(), exp);
25032503
}
25042504

@@ -2532,7 +2532,7 @@ public String processXML(File file) throws Exception {
25322532
tei = FileUtils.readFileToString(new File(newFilePath), UTF_8);
25332533

25342534
} catch (final Exception exp) {
2535-
logger.error("An error occured while processing the following XML file: " + file.getAbsolutePath(), exp);
2535+
logger.error("An error occurred while processing the following XML file: " + file.getAbsolutePath(), exp);
25362536
} finally {
25372537
if (newFilePath != null) {
25382538
File newFile = new File(newFilePath);
@@ -2824,7 +2824,7 @@ public Pair<List<SoftwareEntity>, List<BibDataSet>> processTEIDocument(org.w3c.d
28242824
}
28252825
} catch (Exception e) {
28262826
throw new GrobidException(
2827-
"An exception occured while running consolidation on bibliographical references.", e);
2827+
"An exception occurred while running consolidation on bibliographical references.", e);
28282828
}
28292829

28302830
// propagate the bib. ref. to the entities corresponding to the same software name without bib. ref.

src/main/java/org/grobid/core/utilities/ArticleUtilities.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ public static File uploadFile(String urll, String path, String name) throws Exce
201201
return outFile;
202202
}
203203
catch (Exception e) {
204-
throw new Exception("An exception occured while downloading " + urll, e);
204+
throw new Exception("An exception occurred while downloading " + urll, e);
205205
}
206206
}
207207

src/main/java/org/grobid/service/controller/SoftwareController.java

Lines changed: 118 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
import com.google.inject.Inject;
44
import com.google.inject.Singleton;
55
import jakarta.ws.rs.*;
6+
import jakarta.ws.rs.client.Client;
67
import jakarta.ws.rs.core.MediaType;
78
import jakarta.ws.rs.core.Response;
89
import org.glassfish.jersey.media.multipart.FormDataParam;
9-
import org.grobid.core.utilities.GrobidProperties;
1010
import org.grobid.core.utilities.SoftwareConfiguration;
1111
import org.grobid.core.utilities.Versioner;
1212
import org.grobid.service.configuration.SoftwareServiceConfiguration;
@@ -15,6 +15,8 @@
1515
import org.slf4j.LoggerFactory;
1616

1717
import java.io.InputStream;
18+
import java.util.Collections;
19+
import java.util.Map;
1820

1921
/**
2022
* RESTful service for GROBID Software extension.
@@ -35,9 +37,11 @@ public class SoftwareController implements SoftwarePaths {
3537
private static final String INPUT = "input";
3638

3739
private SoftwareConfiguration configuration;
40+
private final SoftwareServiceConfiguration serviceConfiguration;
41+
private final Client httpClient;
3842

3943
@Inject
40-
public SoftwareController(SoftwareServiceConfiguration serviceConfiguration) {
44+
public SoftwareController(SoftwareServiceConfiguration serviceConfiguration, Client httpClient) {
4145
/*try {
4246
ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
4347
this.configuration = mapper.readValue(new File("resources/config/config.yml"), SoftwareConfiguration.class);
@@ -46,6 +50,8 @@ public SoftwareController(SoftwareServiceConfiguration serviceConfiguration) {
4650
this.configuration = null;
4751
}*/
4852
this.configuration = serviceConfiguration.getSoftwareConfiguration();
53+
this.serviceConfiguration = serviceConfiguration;
54+
this.httpClient = httpClient;
4955
}
5056

5157
@Path(PATH_IS_ALIVE)
@@ -138,4 +144,114 @@ public ServiceInfo getVersion() {
138144
return new ServiceInfo(Versioner.getVersion(), Versioner.getRevision());
139145
}
140146

147+
// New endpoint: return concept service base URL derived from entity-fishing host/port
148+
@Path(PATH_CONFIG_CONCEPT_BASE_URL)
149+
@Produces(MediaType.APPLICATION_JSON)
150+
@GET
151+
public Response getConceptServiceBaseUrl() {
152+
String base = buildConceptBaseUrl();
153+
Map<String, String> payload = Collections.singletonMap("conceptBaseUrl", base);
154+
return Response.ok(payload).build();
155+
}
156+
157+
// New proxy endpoint: forward concept lookup using configured host/port
158+
@Path("kb/concept/{identifier}")
159+
@Produces(MediaType.APPLICATION_JSON)
160+
@GET
161+
public Response proxyKbConcept(@PathParam("identifier") String identifier, @QueryParam("lang") String lang) {
162+
String base = buildConceptBaseUrl();
163+
String sep = base.endsWith("/") ? "" : "/";
164+
String target = base + sep + identifier;
165+
if (lang != null && !lang.isEmpty()) {
166+
target = target + "?lang=" + lang;
167+
}
168+
try {
169+
String json = httpClient.target(target).request(MediaType.APPLICATION_JSON_TYPE).get(String.class);
170+
return Response.ok(json, MediaType.APPLICATION_JSON_TYPE).build();
171+
} catch (Exception e) {
172+
LOGGER.error("Error proxying concept lookup to {}", target, e);
173+
return Response.status(Response.Status.BAD_GATEWAY)
174+
.entity(Collections.singletonMap("error", "Failed to fetch concept from upstream"))
175+
.build();
176+
}
177+
}
178+
179+
// Build the concept base URL from entityFishingHost/Port, with sensible defaults
180+
private String buildConceptBaseUrl() {
181+
String host = serviceConfiguration != null ? serviceConfiguration.getEntityFishingHost() : null;
182+
String port = serviceConfiguration != null ? serviceConfiguration.getEntityFishingPort() : null;
183+
if (host == null || host.isEmpty()) {
184+
// fall back to public endpoint
185+
return "https://cloud.science-miner.com/nerd/service/kb/concept";
186+
}
187+
188+
String original = host.trim();
189+
String lower = original.toLowerCase();
190+
boolean hasScheme = lower.startsWith("http://") || lower.startsWith("https://");
191+
192+
String scheme;
193+
if (hasScheme) {
194+
scheme = lower.startsWith("https://") ? "https" : "http";
195+
} else {
196+
scheme = (port != null && ("443".equals(port) || "8443".equals(port))) ? "https" : "http";
197+
}
198+
199+
// Extract hostPart and pathPart if scheme is present
200+
String hostPart = original;
201+
String pathPart = "";
202+
if (hasScheme) {
203+
String noScheme = original.substring(original.indexOf("://") + 3);
204+
int slash = noScheme.indexOf("/");
205+
if (slash >= 0) {
206+
hostPart = noScheme.substring(0, slash);
207+
pathPart = noScheme.substring(slash); // includes leading '/'
208+
} else {
209+
hostPart = noScheme;
210+
pathPart = "";
211+
}
212+
} else {
213+
// original may already include a path like 'traces1.inria.fr/nerd'
214+
int slash = original.indexOf("/");
215+
if (slash >= 0) {
216+
hostPart = original.substring(0, slash);
217+
pathPart = original.substring(slash);
218+
} else {
219+
hostPart = original;
220+
pathPart = "";
221+
}
222+
}
223+
224+
// Append port if missing in hostPart and provided in config (and non-default for scheme)
225+
boolean hostHasPort = hostPart.contains(":");
226+
if (!hostHasPort && port != null && !port.isEmpty()) {
227+
boolean defaultForScheme = ("https".equals(scheme) && "443".equals(port)) || ("http".equals(scheme) && "80".equals(port));
228+
if (!defaultForScheme) {
229+
hostPart = hostPart + ":" + port;
230+
}
231+
}
232+
233+
// Ensure '/nerd' is present at the beginning of pathPart
234+
if (pathPart == null || pathPart.isEmpty() || !pathPart.matches("(?i)^/nerd(/.*)?$")) {
235+
// if pathPart is empty or doesn't start with '/nerd', prepend it
236+
if (pathPart == null || pathPart.isEmpty()) {
237+
pathPart = "/nerd";
238+
} else {
239+
// avoid double slashes
240+
if (!pathPart.startsWith("/")) {
241+
pathPart = "/" + pathPart;
242+
}
243+
pathPart = "/nerd" + pathPart;
244+
}
245+
}
246+
247+
// Build final base
248+
String base = scheme + "://" + hostPart;
249+
// remove trailing slash from pathPart
250+
if (pathPart.endsWith("/")) {
251+
pathPart = pathPart.substring(0, pathPart.length() - 1);
252+
}
253+
base += pathPart + "/service/kb/concept";
254+
return base;
255+
}
256+
141257
}

src/main/java/org/grobid/service/controller/SoftwarePaths.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,10 @@ public interface SoftwarePaths {
5454
public static final String PATH_SOFTWARE_CONTEXT = "characterizeSoftwareContext";
5555

5656
public static final String PATH_VERSION = "version";
57+
58+
// New path to expose concept service base URL from configuration
59+
public static final String PATH_CONFIG_CONCEPT_BASE_URL = "config/conceptBaseUrl";
60+
61+
// New path for proxying concept lookup via backend
62+
public static final String PATH_KB_CONCEPT = "kb/concept/{identifier}";
5763
}

src/main/java/org/grobid/trainer/ExportCorpusJson.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public void convert() {
4545
p.parse(inputPath, handler);
4646

4747
} catch (Exception e) {
48-
throw new GrobidException("An exception occured while training GROBID.", e);
48+
throw new GrobidException("An exception occurred while training GROBID.", e);
4949
} finally {
5050
try {
5151
if (writer != null) {

src/main/java/org/grobid/trainer/SoftwareAnnotationCollectionSaxHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public void endElement(java.lang.String uri,
6565
}
6666
} catch (Exception e) {
6767
// e.printStackTrace();
68-
throw new GrobidException("An exception occured while running Grobid.", e);
68+
throw new GrobidException("An exception occurred while running Grobid.", e);
6969
}
7070
}
7171

@@ -133,7 +133,7 @@ public void startElement(String namespaceURI,
133133
}
134134
} catch (Exception e) {
135135
// e.printStackTrace();
136-
throw new GrobidException("An exception occured while running Grobid.", e);
136+
throw new GrobidException("An exception occurred while running Grobid.", e);
137137
}
138138
}
139139

src/main/java/org/grobid/trainer/SoftwareAnnotationSaxHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public void endElement(java.lang.String uri,
103103
allLabeledSoftwareMarkers.add(labeledSoftwareMarkers);
104104
}
105105
} catch (Exception e) {
106-
throw new GrobidException("An exception occured while running Grobid.", e);
106+
throw new GrobidException("An exception occurred while running Grobid.", e);
107107
}
108108
}
109109

@@ -192,7 +192,7 @@ public void startElement(String namespaceURI,
192192
}
193193
} catch (Exception e) {
194194
// e.printStackTrace();
195-
throw new GrobidException("An exception occured while running Grobid.", e);
195+
throw new GrobidException("An exception occurred while running Grobid.", e);
196196
}
197197
}
198198

src/main/java/org/grobid/trainer/SoftwareExtendedEval.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ public int createCRFPPData(final File corpusDir,
226226
}
227227
}
228228
} catch (Exception e) {
229-
throw new GrobidException("An exception occured while training GROBID.", e);
229+
throw new GrobidException("An exception occurred while training GROBID.", e);
230230
} finally {
231231
try {
232232
if (writerTraining != null)

src/main/java/org/grobid/trainer/SoftwareTrainer.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ public int createCRFPPData(final File corpusDir,
337337
}
338338
}
339339
} catch (Exception e) {
340-
throw new GrobidException("An exception occured while training GROBID.", e);
340+
throw new GrobidException("An exception occurred while training GROBID.", e);
341341
} finally {
342342
try {
343343
if (writerTraining != null)
@@ -582,7 +582,7 @@ else if (currentAnnotation.getPageNumber() > token.getPage())
582582
}
583583
}*/
584584
} catch (Exception e) {
585-
throw new GrobidException("An exception occured while training GROBID.", e);
585+
throw new GrobidException("An exception occurred while training GROBID.", e);
586586
} finally {
587587
try {
588588
if (writerTraining != null)
@@ -722,7 +722,7 @@ else if (currentAnnotation.getPageNumber() > token.getPage())
722722
}
723723
crfWriter.write("\n");
724724
} catch (Exception e) {
725-
throw new GrobidException("An exception occured while training Grobid.", e);
725+
throw new GrobidException("An exception occurred while training Grobid.", e);
726726
} finally {
727727
try {
728728
if (crfWriter != null)
@@ -815,7 +815,7 @@ static public void addFeatures(List<Pair<String, String>> texts,
815815
isSoftwarePattern = false;
816816
}
817817
} catch (Exception e) {
818-
throw new GrobidException("An exception occured while running Grobid.", e);
818+
throw new GrobidException("An exception occurred while running Grobid.", e);
819819
}
820820
}
821821

@@ -928,7 +928,7 @@ public int selectNegativeExamples(File negativeCorpusFile, double max, File outp
928928
writer.write(serialize(document, null));
929929
}
930930
} catch (Exception e) {
931-
throw new GrobidException("An exception occured while selecting negative examples.", e);
931+
throw new GrobidException("An exception occurred while selecting negative examples.", e);
932932
} finally {
933933
try {
934934
if (writer != null)
@@ -1035,7 +1035,7 @@ public int randomNegativeExamples(File negativeCorpusFile, double max, File outp
10351035
writer.write(serialize(document, null));
10361036
}
10371037
} catch (Exception e) {
1038-
throw new GrobidException("An exception occured while selecting negative examples.", e);
1038+
throw new GrobidException("An exception occurred while selecting negative examples.", e);
10391039
} finally {
10401040
try {
10411041
if (writer != null)

0 commit comments

Comments
 (0)