Skip to content

Commit

Permalink
Allow missing English Resource attributes, logging changes in Endpoin…
Browse files Browse the repository at this point in the history
…tDescription parsing

- Change log levels to warn on EndpointDescription parsing
- Add more debug logs of parsed elements
- Refactor some code
  • Loading branch information
Querela committed Feb 2, 2024
1 parent 52a1599 commit e0df3d7
Showing 1 changed file with 73 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public class ClarinFCSEndpointDescriptionParser implements

private static final Logger logger = LoggerFactory.getLogger(ClarinFCSClientBuilder.class);
private static final String ED_NS_URI = "http://clarin.eu/fcs/endpoint-description";
private static final String ED_NS_LEGACY_URI = "http://clarin.eu/fcs/1.0/resource-info";
private static final QName ED_ROOT_ELEMENT = new QName(ED_NS_URI, "EndpointDescription");
private static final int VERSION_1 = 1;
private static final int VERSION_2 = 2;
Expand Down Expand Up @@ -162,6 +163,7 @@ public SRUExtraResponseData parse(XMLStreamReader reader)
logger.debug("parsing with xpath");
final Document erdDoc = XmlStreamReaderUtils.parseToDocument(reader);
try {
checkLegacyMode(erdDoc);
return parseEndpointDescription(erdDoc, maxDepth);
} catch (XPathExpressionException e) {
throw new SRUClientException("internal error", e);
Expand All @@ -170,8 +172,45 @@ public SRUExtraResponseData parse(XMLStreamReader reader)
}


/**
* Get the maximum resource enumeration parsing depth. The first level is
* indicate by the value <code>0</code>.
*
* @return the default resource parsing depth or <code>-1</code> for
* infinite.
*/
public int getMaximumResourceParsingDepth() {
return maxDepth;
}


// -----------------------------------------------------------------------

private static void checkLegacyMode(Document doc)
throws SRUClientException {
Element root = doc.getDocumentElement();
if (root != null) {
String ns = root.getNamespaceURI();
if (ns != null) {
if (ns.equals(ED_NS_LEGACY_URI)) {
logger.error("Detected out-dated resource info catalog file." +
" Update to the current version is required");
throw new SRUClientException("unsupport file format: " + ns);
} else if (!ns.equals(ED_NS_URI)) {
logger.error("Detected unsupported resource info catalog file " +
" with namespace '" + ns + '"');
throw new SRUClientException("Unsupport file format: " + ns);
}
} else {
throw new SRUClientException("No namespace URI was detected " +
"for resource info catalog file!");
}
} else {
throw new SRUClientException("Error retrieving root element");
}
}


private static ClarinFCSEndpointDescription parseEndpointDescription(Document doc, int maxDepth)
throws SRUClientException, XPathExpressionException {
XPathFactory factory = XPathFactory.newInstance();
Expand Down Expand Up @@ -221,27 +260,29 @@ public String getNamespaceURI(String prefix) {
if (version == -1) {
throw new SRUClientException("Attribute @version missing on element <EndpointDescription>");
}
logger.debug("Endpoint description version is {}", version);

// capabilities
List<URI> capabilities = parseCapabilities(xpath, doc, version);
final boolean hasBasicSearch = (capabilities.indexOf(ClarinFCSConstants.CAPABILITY_BASIC_SEARCH) != -1);
final boolean hasAdvancedSearch = (capabilities.indexOf(ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH) != -1);
logger.debug("CAP: {}", capabilities);

// used to check for uniqueness of id attribute
final Set<String> xml_ids = new HashSet<>();

// SupportedDataViews
List<DataView> supportedDataViews = parseSupportedDataViews(xpath, doc, capabilities, xml_ids);
logger.debug("DV: {}", supportedDataViews);

// SupportedLayers
List<Layer> supportedLayers = parseSupportedLayers(xpath, doc, capabilities, xml_ids);
logger.debug("L: {}", supportedLayers);

// Resources
exp = xpath.compile("/ed:EndpointDescription/ed:Resources/ed:Resource");
NodeList list = (NodeList) exp.evaluate(doc, XPathConstants.NODESET);
final Set<String> pids = new HashSet<>();
List<ResourceInfo> resources = parseResources(xpath, list, 0, maxDepth, pids,
supportedDataViews, supportedLayers, version, hasAdvancedSearch);
supportedDataViews, supportedLayers, version, capabilities);
if ((resources == null) || resources.isEmpty()) {
throw new SRUClientException("No resources where defined in endpoint description");
}
Expand Down Expand Up @@ -362,6 +403,7 @@ private static List<DataView> parseSupportedDataViews(XPath xpath, Document doc,
"no valid information about supported data views");
}

final boolean hasAdvancedSearch = capabilities.contains(ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH);
boolean hasHitsView = false;
boolean hasAdvView = false;
if (supportedDataViews != null) {
Expand All @@ -379,7 +421,7 @@ private static List<DataView> parseSupportedDataViews(XPath xpath, Document doc,
MIMETYPE_HITS_DATAVIEW +
"') to conform to CLARIN-FCS specification");
}
if (capabilities.contains(ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH) && !hasAdvView) {
if (hasAdvancedSearch && !hasAdvView) {
throw new SRUClientException("Endpoint claimes to support " +
"Advanced FCS but does not declare Advanced Data View (" +
MIMETYPE_ADV_DATAVIEW + ") in <SupportedDataViews>");
Expand Down Expand Up @@ -483,15 +525,15 @@ private static List<Layer> parseSupportedLayers(XPath xpath, Document doc,
}
}

if (capabilities.contains(ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH) && (supportedLayers == null)) {
final boolean hasAdvancedSearch = capabilities.contains(ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH);
if (hasAdvancedSearch && (supportedLayers == null)) {
throw new SRUClientException("Endpoint must declare " +
"all supported layers (<SupportedLayers>) if they " +
"provide the 'advanced-search' (" +
ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH +
") capability");
}
if ((supportedLayers != null) &&
!capabilities.contains(ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH)) {
if ((supportedLayers != null) && !hasAdvancedSearch) {
logger.warn("Endpoint description has <SupportedLayer> but " +
"does not indicate support for Advanced Search using " +
"the capability ({})!",
Expand All @@ -504,8 +546,9 @@ private static List<Layer> parseSupportedLayers(XPath xpath, Document doc,

private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
int depth, int maxDepth, Set<String> pids, List<DataView> supportedDataViews,
List<Layer> supportedLayers, int version, boolean hasAdv)
List<Layer> supportedLayers, int version, List<URI> capabilities)
throws SRUClientException, XPathExpressionException {
final boolean hasAdvancedSearch = capabilities.contains(ClarinFCSConstants.CAPABILITY_ADVANCED_SEARCH);
List<ResourceInfo> ris = null;

for (int k = 0; k < nodes.getLength(); k++) {
Expand All @@ -530,11 +573,10 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
"with pid '" + pid + "' already exists");
}
pids.add(pid);
logger.debug("Processing resource with pid '{}'", pid);
logger.debug("Processing resource with pid '{}' at level {}", pid, depth);

XPathExpression exp = xpath.compile("ed:Title");
NodeList list = (NodeList) exp.evaluate(node,
XPathConstants.NODESET);
NodeList list = (NodeList) exp.evaluate(node, XPathConstants.NODESET);
if ((list != null) && (list.getLength() > 0)) {
for (int i = 0; i < list.getLength(); i++) {
final Element n = (Element) list.item(i);
Expand All @@ -555,16 +597,17 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
titles = new HashMap<>();
}
if (titles.containsKey(lang)) {
logger.debug("title with language '{}' already exists",
logger.warn("A <Title> with language '{}' already exists",
lang);
} else {
titles.put(lang, title);
}
}
if ((titles != null) && !titles.containsKey(LANG_EN)) {
throw new SRUClientException("A <Title> with language 'en' is mandatory");
logger.warn("A <Title> with language 'en' is mandatory");
}
}
logger.debug("Title: {}", titles);

exp = xpath.compile("ed:Description");
list = (NodeList) exp.evaluate(node, XPathConstants.NODESET);
Expand All @@ -585,16 +628,17 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
}

if (descrs.containsKey(lang)) {
logger.debug("description with language '{}' " +
logger.warn("A <Description> with language '{}' " +
"already exists", lang);
} else {
descrs.put(lang, desc);
}
}
if ((descrs != null) && !descrs.containsKey(LANG_EN)) {
throw new SRUClientException("A <Description> with language 'en' is mandatory");
logger.warn("A <Description> with language 'en' is mandatory");
}
}
logger.debug("Description: {}", descrs);

exp = xpath.compile("ed:Institution");
list = (NodeList) exp.evaluate(node, XPathConstants.NODESET);
Expand All @@ -615,16 +659,17 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
}

if (insts.containsKey(lang)) {
logger.debug("institution with language '{}' " +
logger.warn("An <Institution> with language '{}' " +
"already exists", lang);
} else {
insts.put(lang, inst);
}
}
if ((insts != null) && !insts.containsKey(LANG_EN)) {
throw new SRUClientException("A <Institution> with language 'en' is mandatory");
logger.warn("An <Institution> with language 'en' is mandatory");
}
}
logger.debug("Institution: {}", insts);

exp = xpath.compile("ed:LandingPageURI");
list = (NodeList) exp.evaluate(node, XPathConstants.NODESET);
Expand All @@ -634,6 +679,7 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
link = cleanString(n.getTextContent());
}
}
logger.debug("LandingPageURI: {}", link);

exp = xpath.compile("ed:Languages/ed:Language");
list = (NodeList) exp.evaluate(node, XPathConstants.NODESET);
Expand Down Expand Up @@ -663,6 +709,7 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
langs.add(s);
}
}
logger.debug("Languages: {}", langs);

exp = xpath.compile("ed:AvailableDataViews");
Node n = (Node) exp.evaluate(node, XPathConstants.NODE);
Expand Down Expand Up @@ -698,13 +745,13 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
}
}
} else {
throw new SRUClientException(
"missing element <AvailableDataViews>");
throw new SRUClientException("Missing element <AvailableDataViews>");
}
if (availableDataViews == null) {
throw new SRUClientException("No available data views were " +
"defined for resource with PID '" + pid + "'");
}
logger.debug("DataViews: {}", availableDataViews);

exp = xpath.compile("ed:AvailableLayers");
n = (Node) exp.evaluate(node, XPathConstants.NODE);
Expand Down Expand Up @@ -740,21 +787,24 @@ private static List<ResourceInfo> parseResources(XPath xpath, NodeList nodes,
}
}
} else {
if (hasAdv) {
logger.debug("no <SupportedLayers> for resource '{}'", pid);
if (hasAdvancedSearch) {
logger.debug("No <SupportedLayers> for resource '{}'", pid);
}
}
logger.debug("Layers: {}", availableLayers);

final int nextDepth = depth + 1;
if ((maxDepth == INFINITE_MAX_DEPTH) || (nextDepth < maxDepth)) {
exp = xpath.compile("ed:Resources/ed:Resource");
list = (NodeList) exp.evaluate(node, XPathConstants.NODESET);
if ((list != null) && (list.getLength() > 0)) {
sub = parseResources(xpath, list, depth + 1, maxDepth, pids, supportedDataViews,
supportedLayers, version, hasAdv);
supportedLayers, version, capabilities);
}
}

// Extensions (skipped) ...

if (ris == null) {
ris = new ArrayList<>();
}
Expand Down Expand Up @@ -1024,18 +1074,6 @@ private static ClarinFCSEndpointDescription parseEndpointDescription(XMLStreamRe
}


/**
* Get the maximum resource enumeration parsing depth. The first level is
* indicate by the value <code>0</code>.
*
* @return the default resource parsing depth or <code>-1</code> for
* infinite.
*/
public int getMaximumResourceParsingDepth() {
return maxDepth;
}


private static List<ResourceInfo> parseResources(XMLStreamReader reader,
int depth, int maxDepth, boolean hasAdvancedSearch,
List<DataView> supportedDataviews, List<Layer> supportedLayers)
Expand Down Expand Up @@ -1146,7 +1184,7 @@ private static List<ResourceInfo> parseResources(XMLStreamReader reader,
}
} // for
logger.debug("Layers: {}", layers);
} // for
}
if (hasAdvancedSearch && (layers == null)) {
throw new XMLStreamException("Endpoint must declare " +
"all available layers (<AvailableLayers>) on a " +
Expand Down

0 comments on commit e0df3d7

Please sign in to comment.