Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Geoip database update implementation #4105

Merged
142 changes: 43 additions & 99 deletions data-prepper-plugins/geoip-processor/build.gradle
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

plugins{
id 'de.undercouch.download' version '4.1.2'
id 'de.undercouch.download' version '5.5.0'
}
apply plugin: 'de.undercouch.download'

import de.undercouch.gradle.tasks.download.Download

/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

dependencies {
implementation project(':data-prepper-api')
implementation project(path: ':data-prepper-plugins:common')
Expand All @@ -19,131 +19,75 @@ dependencies {
implementation libs.commons.compress
implementation 'org.mapdb:mapdb:3.0.8'
implementation libs.commons.io
implementation 'software.amazon.awssdk:aws-sdk-java:2.20.67'
implementation 'software.amazon.awssdk:sts'
implementation 'software.amazon.awssdk:s3-transfer-manager'
implementation 'software.amazon.awssdk.crt:aws-crt:0.21.17'
implementation 'software.amazon.awssdk.crt:aws-crt:0.29.9'
implementation 'com.maxmind.geoip2:geoip2:4.0.1'
implementation 'com.maxmind.db:maxmind-db:3.0.0'
implementation 'org.hibernate.validator:hibernate-validator:8.0.1.Final'

implementation libs.commons.lang3

testImplementation project(':data-prepper-core')
testImplementation 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310'
testImplementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml'
testImplementation project(':data-prepper-test-common')
}
def geoIP2='GeoIP2'
def geoLite2= 'GeoLite2'
task downloadFile(type: Download) {

def urls = [
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoIP2-City-Test.mmdb',
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoIP2-Country-Test.mmdb',
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoLite2-ASN-Test.mmdb'
]
def mmdbFileExtension = '.mmdb'
def baseDirPath = 'src/test/resources/mmdb-file/geo-lite2/'

urls.each { url ->
src(url)
dest(baseDirPath)
doLast {

def testFileName = url.substring(url.lastIndexOf('/') + 1)
def testMmdbSubString = testFileName.substring(testFileName.lastIndexOf('-'))
def fileName = testFileName.substring(0, testFileName.length() - testMmdbSubString.length())
def downloadFiles = tasks.register('downloadFiles')

if(fileName.contains(geoIP2)) {
fileName = fileName.replace(geoIP2, geoLite2)
}
File sourceFile = file(baseDirPath+testFileName)
File destinationFile = file( baseDirPath+fileName+mmdbFileExtension)
sourceFile.renameTo(destinationFile)
def databaseNames = [
'GeoLite2-City-Test',
'GeoLite2-Country-Test',
'GeoLite2-ASN-Test'
]

}
databaseNames.forEach { databaseName -> {

def url = "https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/${databaseName}.mmdb"
def gradleName = databaseName.replaceAll('-', '')
def downloadTask = tasks.register("download${gradleName}", Download) {
src(url)
dest "build/resources/test/mmdb-files/geo-lite2/${databaseName}.mmdb"
overwrite true
}
downloadFiles.get().dependsOn downloadTask
}}

def enterpriseDatabaseNames = [
'GeoIP2-Enterprise-Test'
]

}
task downloadEnterpriseFile(type: Download) {
dependsOn downloadFile
def urls = [
'https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/GeoIP2-Enterprise-Test.mmdb'
]
def mmdbFileExtension = '.mmdb'
def baseDirPath = 'src/test/resources/mmdb-file/geo-enterprise/'

urls.each { url ->
src(url)
def testFileName = url.substring(url.lastIndexOf('/') + 1)
def testMmdbSubString = testFileName.substring(testFileName.lastIndexOf('-'))
def fileName = testFileName.substring(0, testFileName.length() - testMmdbSubString.length())

dest(baseDirPath+testFileName)
doLast {
if(fileName.contains(geoIP2)) {
fileName = fileName.replace(geoIP2, geoLite2)
}
File sourceFile = file(baseDirPath+testFileName)
File destinationFile = file( baseDirPath+fileName+mmdbFileExtension)
sourceFile.renameTo(destinationFile)
}
enterpriseDatabaseNames.forEach { enterpriseDatabaseName -> {

def url = "https://raw.githubusercontent.com/maxmind/MaxMind-DB/main/test-data/${enterpriseDatabaseName}.mmdb"
def gradleName = enterpriseDatabaseName.replaceAll('-', '')
def downloadEnterpriseTask = tasks.register("download${gradleName}", Download) {
src(url)
dest "build/resources/test/mmdb-files/geo-ip2/${enterpriseDatabaseName}.mmdb"
overwrite true
}
downloadFiles.get().dependsOn downloadEnterpriseTask
}}

}

/*task processTestResources(type: Copy) {
dependsOn downloadEnterpriseFile
from 'src/test/resources' // Source directory containing test resources
into 'build/resources/test' // Destination directory for processed test resources
}*/
tasks.test.dependsOn 'processTestResources'
tasks.processTestResources.dependsOn 'downloadEnterpriseFile'
test {
useJUnitPlatform()
dependsOn(downloadFiles)
}

checkstyleTest {
dependsOn(downloadFiles)
}

jacocoTestCoverageVerification {
dependsOn jacocoTestReport
violationRules {
rule {
limit {
minimum = 0.1 // temporarily reduce coverage for the builds to pass
minimum = 0.85
}
}
}
}

check.dependsOn jacocoTestCoverageVerification

sourceSets {
integrationTest {
java {
compileClasspath += main.output + test.output
runtimeClasspath += main.output + test.output
srcDir file('src/integrationTest/java')
}
resources.srcDir file('src/integrationTest/resources')
}
}

configurations {
integrationTestImplementation.extendsFrom testImplementation
integrationTestRuntime.extendsFrom testRuntime
}

task integrationTest(type: Test) {
group = 'verification'
testClassesDirs = sourceSets.integrationTest.output.classesDirs

useJUnitPlatform()

classpath = sourceSets.integrationTest.runtimeClasspath
systemProperty 'tests.geoipProcessor.maxmindLicenseKey', System.getProperty('tests.geoipProcessor.maxmindLicenseKey')

filter {
includeTestsMatching '*IT'
}
}
check.dependsOn jacocoTestCoverageVerification
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public void setUp() throws JsonProcessingException {

public GeoIPProcessorService createObjectUnderTest() {
// TODO: pass in geoIpServiceConfig object
return new GeoIPProcessorService(null);
return new GeoIPProcessorService(null, null, null);
}

@Test
Expand All @@ -93,7 +93,7 @@ void verify_enrichment_of_data_from_maxmind_url() throws UnknownHostException {
if (IPValidationCheck.isPublicIpAddress(ipAddress)) {
InetAddress inetAddress = InetAddress.getByName(ipAddress);
//All attributes are considered by default with the null value
geoData = geoIPProcessorService.getGeoData(inetAddress, null);
// geoData = geoIPProcessorService.getGeoData(inetAddress, null);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this meant to be commented out?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't fixed these integ tests yet, I need to change this because the constructor changed.


assertThat(geoData.get("country_iso_code"), equalTo("US"));
assertThat(geoData.get("ip"), equalTo("8.8.8.8"));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor;

public enum GeoIPDatabase {
CITY,
COUNTRY,
ASN,
ENTERPRISE;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public enum GeoIPField {
CONTINENT_CODE("continent_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
CONTINENT_NAME("continent_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
COUNTRY_NAME("country_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
IS_COUNTRY_IN_EUROPEAN_UNION("is_country_in_european_union", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
COUNTRY_ISO_CODE("country_iso_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
COUNTRY_CONFIDENCE("country_confidence", GeoIPDatabase.ENTERPRISE),
REGISTERED_COUNTRY_NAME("registered_country_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REGISTERED_COUNTRY_ISO_CODE("registered_country_iso_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REPRESENTED_COUNTRY_NAME("represented_country_name", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REPRESENTED_COUNTRY_ISO_CODE("represented_country_iso_code", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
REPRESENTED_COUNTRY_TYPE("represented_country_type", GeoIPDatabase.COUNTRY, GeoIPDatabase.ENTERPRISE),
CITY_NAME("city_name", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
CITY_CONFIDENCE("city_confidence", GeoIPDatabase.ENTERPRISE),
LOCATION("location", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LATITUDE("latitude", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LONGITUDE("longitude", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LOCATION_ACCURACY_RADIUS("location_accuracy_radius", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
METRO_CODE("metro_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
TIME_ZONE("time_zone", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
POSTAL_CODE("postal_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
POSTAL_CODE_CONFIDENCE("postal_code_confidence", GeoIPDatabase.ENTERPRISE),
MOST_SPECIFIED_SUBDIVISION_NAME("most_specified_subdivision_name", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
MOST_SPECIFIED_SUBDIVISION_ISO_CODE("most_specified_subdivision_iso_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
MOST_SPECIFIED_SUBDIVISION_CONFIDENCE("most_specified_subdivision_confidence", GeoIPDatabase.ENTERPRISE),
LEAST_SPECIFIED_SUBDIVISION_NAME("least_specified_subdivision_name", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LEAST_SPECIFIED_SUBDIVISION_ISO_CODE("least_specified_subdivision_iso_code", GeoIPDatabase.CITY, GeoIPDatabase.ENTERPRISE),
LEAST_SPECIFIED_SUBDIVISION_CONFIDENCE("least_specified_subdivision_confidence", GeoIPDatabase.ENTERPRISE),

ASN("asn", GeoIPDatabase.ASN),
ASN_ORGANIZATION("asn_organization", GeoIPDatabase.ASN),
NETWORK("network", GeoIPDatabase.ASN),
IP("ip", GeoIPDatabase.ASN);

private final HashSet<GeoIPDatabase> geoIPDatabases;
private final String fieldName;

GeoIPField(final String fieldName, final GeoIPDatabase... geoIPDatabases) {
this.fieldName = fieldName;
this.geoIPDatabases = new HashSet<>(Arrays.asList(geoIPDatabases));
}

public static GeoIPField findByName(final String name) {
GeoIPField result = null;
for (GeoIPField geoIPField : values()) {
if (geoIPField.getFieldName().equalsIgnoreCase(name)) {
result = geoIPField;
break;
}
}
return result;
}

public String getFieldName() {
return fieldName;
}

public Set<GeoIPDatabase> getGeoIPDatabases() {
return geoIPDatabases;
}
}
Loading
Loading