Skip to content

Commit

Permalink
Feature/exclude paths in curation (#206)
Browse files Browse the repository at this point in the history
* First draft. Add excludedPaths property and inject ComponentInfoCurator into UncuratedScancodeComponentInfoProvider

* New approach using SingleFileCurationProvider

* Fix typo

* Add Unit Tests

* Add feature description and release note.

* Fix NullPointerException

* Revert scancode property to default false

* Fix typo and add github issue to release note

* Use CurationsProvider, Add curationDataSelector, Move exclusion logic from loop, Format code

* Add Unit Test

* Cleanup and Formatting

* minor cleanup

* Renamed interfaces/classes in from "Uncurated..." to "Filtered..." to align with changed semantics

---------

Co-authored-by: ohecker <[email protected]>
  • Loading branch information
duph97 and ohecker authored Oct 24, 2023
1 parent 6bfc85c commit eae3310
Show file tree
Hide file tree
Showing 14 changed files with 255 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public ComponentInfoCuratorImpl(CurationProvider curationProvider,

/**
* Checks for the existence of curation for the given package via the {@link CurationProvider}. If curations exist
* then a new curated {@link ComponentInfo} instance will be created from the incoming uncurated {@link ComponentInfo}
* then a new curated {@link ComponentInfo} instance will be created from the incoming filtered {@link ComponentInfo}
* and the curation.
*
* @param componentInfo the componentInfo to curate
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,26 @@
import com.devonfw.tools.solicitor.componentinfo.ComponentInfoAdapterException;

/**
* A {@link ComponentInfoAdapter} which takes uncurated {@link ComponentInfo} data from the configuret
* {@link UncuratedComponentInfoProvider} and curates it via the given {@link ComponentInfoCurator}.
* A {@link ComponentInfoAdapter} which takes filtered {@link ComponentInfo} data from the configuret
* {@link FilteredComponentInfoProvider} and curates it via the given {@link ComponentInfoCurator}.
*
*/
public class CuratingComponentInfoAdapter implements ComponentInfoAdapter {

private UncuratedComponentInfoProvider uncuratedComponentInfoProvider;
private FilteredComponentInfoProvider filteredComponentInfoProvider;

private ComponentInfoCurator componentInfoCurator;

/**
* The constructor.
*
* @param uncuratedComponentInfoProvider the provider of the uncurated {@link ComponentInfo} data
* @param filteredComponentInfoProvider the provider of the filtered {@link ComponentInfo} data
* @param componentInfoCurator the curator to take
*/
public CuratingComponentInfoAdapter(UncuratedComponentInfoProvider uncuratedComponentInfoProvider,
public CuratingComponentInfoAdapter(FilteredComponentInfoProvider filteredComponentInfoProvider,
ComponentInfoCurator componentInfoCurator) {

this.uncuratedComponentInfoProvider = uncuratedComponentInfoProvider;
this.filteredComponentInfoProvider = filteredComponentInfoProvider;
this.componentInfoCurator = componentInfoCurator;
}

Expand All @@ -46,7 +46,7 @@ public ComponentInfo getComponentInfo(String packageUrl, String curationDataSele

if (isFeatureActive()) {

ComponentInfo componentInfo = this.uncuratedComponentInfoProvider.getComponentInfo(packageUrl,
ComponentInfo componentInfo = this.filteredComponentInfoProvider.getComponentInfo(packageUrl,
curationDataSelector);
if (componentInfo == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package com.devonfw.tools.solicitor.componentinfo.curation;

import com.devonfw.tools.solicitor.componentinfo.ComponentInfo;
import com.devonfw.tools.solicitor.componentinfo.ComponentInfoProvider;

/**
* A {@link ComponentInfoProvider} which provides filtered {@link ComponentInfo}s. This is {@link ComponentInfo} which
* is not yet fully curated but data is already filtered to remove information which applies to portions of the original
* scanned code which should be disregarded.
*
*/
public interface FilteredComponentInfoProvider extends ComponentInfoProvider {

}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ public class ComponentInfoCuration {

private List<LicenseInfoCuration> licenses;

private List<String> excludedPaths;

/**
* The constructor.
*/
Expand Down Expand Up @@ -108,4 +110,20 @@ public void setLicenses(List<LicenseInfoCuration> licenses) {
this.licenses = licenses;
}

/**
* @return excluded paths
*/
public List<String> getExcludedPaths() {

return this.excludedPaths;
}

/**
* @param excludedPaths new value of {@link #getExcludedPaths}.
*/
public void setExcludedPaths(List<String> excludedPaths) {

this.excludedPaths = excludedPaths;
}

}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
*
*/
@Component
public class FileScancodeRawComponentInfoProvider implements ScancodeRawComponentInfoPovider {
public class FileScancodeRawComponentInfoProvider implements ScancodeRawComponentInfoProvider {

/**
* The directory within the component root directory which contains the sources / the content
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.devonfw.tools.solicitor.componentinfo.scancode;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.slf4j.Logger;
Expand All @@ -12,26 +13,26 @@
import com.devonfw.tools.solicitor.common.LogMessages;
import com.devonfw.tools.solicitor.common.packageurl.AllKindsPackageURLHandler;
import com.devonfw.tools.solicitor.componentinfo.ComponentInfoAdapterException;
import com.devonfw.tools.solicitor.componentinfo.curation.UncuratedComponentInfoProvider;
import com.devonfw.tools.solicitor.componentinfo.curation.CurationProvider;
import com.devonfw.tools.solicitor.componentinfo.curation.FilteredComponentInfoProvider;
import com.devonfw.tools.solicitor.componentinfo.curation.model.ComponentInfoCuration;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.github.packageurl.PackageURL;

/**
* {@link UncuratedComponentInfoProvider} which delivers data based on scancode data.
* {@link FilteredComponentInfoProvider} which delivers data based on scancode data.
*
*/
@Component
public class UncuratedScancodeComponentInfoProvider implements UncuratedComponentInfoProvider {
public class FilteredScancodeComponentInfoProvider implements FilteredComponentInfoProvider {

private static final Logger LOG = LoggerFactory.getLogger(UncuratedScancodeComponentInfoProvider.class);
private static final Logger LOG = LoggerFactory.getLogger(FilteredScancodeComponentInfoProvider.class);

private static final ObjectMapper mapper = new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT);

private String repoBasePath;

private double minLicenseScore;

private int minLicensefileNumberOfLines;
Expand All @@ -40,34 +41,28 @@ public class UncuratedScancodeComponentInfoProvider implements UncuratedComponen

private AllKindsPackageURLHandler packageURLHandler;

private ScancodeRawComponentInfoPovider fileScancodeRawComponentInfoProvider;
private ScancodeRawComponentInfoProvider fileScancodeRawComponentInfoProvider;

private CurationProvider curationProvider;

/**
* The constructor.
*
* @param fileScancodeRawComponentInfoProvider the provide for the raw scancode data
* @param packageURLHandler the handler for dealing with {@link PackageURL}s.
* @param curationProvider for getting the filter information used for filtering findings based on the paths in the
* code
*/
@Autowired
public UncuratedScancodeComponentInfoProvider(ScancodeRawComponentInfoPovider fileScancodeRawComponentInfoProvider,
AllKindsPackageURLHandler packageURLHandler) {
public FilteredScancodeComponentInfoProvider(ScancodeRawComponentInfoProvider fileScancodeRawComponentInfoProvider,
AllKindsPackageURLHandler packageURLHandler, CurationProvider curationProvider) {

this.fileScancodeRawComponentInfoProvider = fileScancodeRawComponentInfoProvider;
this.packageURLHandler = packageURLHandler;
this.curationProvider = curationProvider;

}

/**
* Sets repoBasePath.
*
* @param repoBasePath new value of repoBasePath.
*/
@Value("${solicitor.scancode.repo-base-path}")
public void setRepoBasePath(String repoBasePath) {

this.repoBasePath = repoBasePath;
}

/**
* Sets minLicenseScore.
*
Expand Down Expand Up @@ -109,7 +104,8 @@ public ScancodeComponentInfo getComponentInfo(String packageUrl, String curation
return null;
}

ScancodeComponentInfo componentScancodeInfos = parseAndMapScancodeJson(packageUrl, rawScancodeData);
ScancodeComponentInfo componentScancodeInfos = parseAndMapScancodeJson(packageUrl, rawScancodeData,
curationDataSelector);
addSupplementedData(rawScancodeData, componentScancodeInfos);
LOG.debug("Scancode info for package {}: {} license, {} copyrights, {} NOTICE files", packageUrl,
componentScancodeInfos.getLicenses().size(), componentScancodeInfos.getCopyrights().size(),
Expand All @@ -135,28 +131,41 @@ private void addSupplementedData(ScancodeRawComponentInfo rawScancodeData,
* @return
* @throws ComponentInfoAdapterException
*/
private ScancodeComponentInfo parseAndMapScancodeJson(String packageUrl, ScancodeRawComponentInfo rawScancodeData)
throws ComponentInfoAdapterException {
private ScancodeComponentInfo parseAndMapScancodeJson(String packageUrl, ScancodeRawComponentInfo rawScancodeData,
String curationDataSelector) throws ComponentInfoAdapterException {

ScancodeComponentInfo componentScancodeInfos = new ScancodeComponentInfo(this.minLicenseScore,
this.minLicensefileNumberOfLines);
componentScancodeInfos.setPackageUrl(packageUrl);

// Get the curation for a given packageUrl
ComponentInfoCuration componentInfoCuration = this.curationProvider.findCurations(packageUrl, curationDataSelector);

// Get all excludedPaths in this curation
List<String> excludedPaths = null;
if (componentInfoCuration != null) {
excludedPaths = componentInfoCuration.getExcludedPaths();
}

JsonNode scancodeJson;
try {
scancodeJson = mapper.readTree(rawScancodeData.rawScancodeResult);
} catch (JsonProcessingException e) {
throw new ComponentInfoAdapterException("Could not parse Scancode JSON", e);
}

// Skip all files, whose path have a prefix which is in the excluded path list
for (JsonNode file : scancodeJson.get("files")) {
String path = file.get("path").asText();
if (isExcluded(path, excludedPaths)) {
continue;
}
if ("directory".equals(file.get("type").asText())) {
continue;
}
if (file.get("path").asText().contains("/NOTICE")) {
componentScancodeInfos.addNoticeFileUrl(
this.fileScancodeRawComponentInfoProvider.pkgContentUriFromPath(packageUrl, file.get("path").asText()),
100.0);
if (path.contains("/NOTICE")) {
componentScancodeInfos
.addNoticeFileUrl(this.fileScancodeRawComponentInfoProvider.pkgContentUriFromPath(packageUrl, path), 100.0);
}
double licenseTextRatio = file.get("percentage_of_license_text").asDouble();
boolean takeCompleteFile = licenseTextRatio >= this.licenseToTextRatioToTakeCompleteFile;
Expand Down Expand Up @@ -216,7 +225,7 @@ private ScancodeComponentInfo parseAndMapScancodeJson(String packageUrl, Scancod
String licenseDefaultUrl = li.get("scancode_text_url").asText();
licenseDefaultUrl = normalizeLicenseUrl(packageUrl, licenseDefaultUrl);
double score = li.get("score").asDouble();
String licenseUrl = file.get("path").asText();
String licenseUrl = path;
int startLine = li.get("start_line").asInt();
int endLine = li.get("end_line").asInt();
if (!takeCompleteFile) {
Expand Down Expand Up @@ -267,4 +276,22 @@ private String normalizeLicenseUrl(String packageUrl, String licenseUrl) {
return adjustedLicenseUrl;
}

/**
* Check if the given path prefix is excluded in the curation.
*
* @param path in the scancode data
* @param excludedPaths all excluded paths defined in the curation
* @return true if path prefix is excluded in curation
*/
private boolean isExcluded(String path, List<String> excludedPaths) {

if (excludedPaths != null) {
for (String excludedPath : excludedPaths) {
if (path.startsWith(excludedPath)) {
return true;
}
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ public class ScancodeComponentInfoAdapter extends CuratingComponentInfoAdapter {
/**
* The constructor.
*
* @param uncuratedScancodeComponentInfoProvider provider for uncurated data originating from scancode data
* @param filteredScancodeComponentInfoProvider provider for filtered data originating from scancode data
* @param componentInfoCurator the curator to use
*/
@Autowired
public ScancodeComponentInfoAdapter(UncuratedScancodeComponentInfoProvider uncuratedScancodeComponentInfoProvider,
public ScancodeComponentInfoAdapter(FilteredScancodeComponentInfoProvider filteredScancodeComponentInfoProvider,
ComponentInfoCurator componentInfoCurator) {

super(uncuratedScancodeComponentInfoProvider, componentInfoCurator);
super(filteredScancodeComponentInfoProvider, componentInfoCurator);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* Provider for {@link ScancodeRawComponentInfo}
*
*/
public interface ScancodeRawComponentInfoPovider extends ComponentContentProvider {
public interface ScancodeRawComponentInfoProvider extends ComponentContentProvider {

/**
* Retrieve the {@link ScancodeRawComponentInfo} for the package given by its PackageURL.
Expand Down
Loading

0 comments on commit eae3310

Please sign in to comment.