Skip to content

Commit

Permalink
Merge pull request #13 from nlnwa/noTextExtraction
Browse files Browse the repository at this point in the history
Remove text extraction
  • Loading branch information
maeb authored Apr 27, 2020
2 parents 7716669 + e82645c commit f218ada
Show file tree
Hide file tree
Showing 14 changed files with 220 additions and 536 deletions.
2 changes: 1 addition & 1 deletion .mvn/extensions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
<extension>
<groupId>fr.brouillard.oss</groupId>
<artifactId>jgitver-maven-plugin</artifactId>
<version>1.4.4</version>
<version>1.5.1</version>
</extension>
</extensions>
69 changes: 30 additions & 39 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,17 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<docker.tag>latest</docker.tag>

<veidemann.version>0.3.3</veidemann.version>
<veidemann.api.version>1.0.0-beta12</veidemann.api.version>
<veidemann.api.version>1.0.0-beta14</veidemann.api.version>
<veidemann.commons.version>0.4.1</veidemann.commons.version>
<veidemann.rethinkdbadapter.version>0.4.5</veidemann.rethinkdbadapter.version>
<org.apache.tika.version>1.22</org.apache.tika.version>
<org.jwat.version>1.1.1</org.jwat.version>
<log4j.version>2.7</log4j.version>
<io.grpc.version>1.20.0</io.grpc.version>
<rethinkdb.version>2.3.6</rethinkdb.version>
<rethinkdb.version>2.4.0</rethinkdb.version>

<!--suppress UnresolvedMavenProperty -->
<docker.tag>${env.DOCKER_TAG}</docker.tag>
<!--suppress UnresolvedMavenProperty -->
<docker.username>${env.DOCKER_USERNAME}</docker.username>
<!--suppress UnresolvedMavenProperty -->
Expand Down Expand Up @@ -88,34 +87,18 @@
<version>${org.jwat.version}</version>
</dependency>

<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>${org.apache.tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>${org.apache.tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-langdetect</artifactId>
<version>${org.apache.tika.version}</version>
</dependency>

<!-- Configuration framework -->
<dependency>
<groupId>com.typesafe</groupId>
<artifactId>config</artifactId>
<version>1.3.3</version>
<version>1.4.0</version>
</dependency>

<!-- Do all logging thru Log4j -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.21</version>
<version>1.7.26</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
Expand Down Expand Up @@ -180,7 +163,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
<version>3.2.0</version>
</plugin>
<plugin>
<groupId>com.google.cloud.tools</groupId>
Expand All @@ -191,6 +174,7 @@
</pluginManagement>

<plugins>
<!-- Make sure we are running tests sequentially -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
Expand All @@ -217,13 +201,6 @@
<configuration>
<to>
<image>docker.io/norsknettarkiv/${project.artifactId}</image>
<tags>
<tag>${docker.tag}</tag>
</tags>
<auth>
<username>${docker.username}</username>
<password>${docker.password}</password>
</auth>
</to>
<container>
<ports>
Expand Down Expand Up @@ -282,6 +259,17 @@
</goals>
</execution>
</executions>
<configuration>
<to>
<tags>
<tag>${docker.tag}</tag>
</tags>
<auth>
<username>${docker.username}</username>
<password>${docker.password}</password>
</auth>
</to>
</configuration>
</plugin>
</plugins>
</build>
Expand All @@ -299,7 +287,6 @@
<id>prepare-tests</id>
<phase>pre-integration-test</phase>
<goals>
<!--<goal>build</goal>-->
<goal>volume-create</goal>
<goal>start</goal>
</goals>
Expand All @@ -321,7 +308,7 @@
<removeVolumes>true</removeVolumes>
<autoCreateCustomNetworks>true</autoCreateCustomNetworks>
<verbose>false</verbose>
<startParallel>false</startParallel>
<startParallel>true</startParallel>
<volumes>
<volume>
<name>contentwriter-rethink-data</name>
Expand Down Expand Up @@ -364,13 +351,18 @@
<enabled>true</enabled>
</log>
<wait>
<log>Server ready.*</log>
<time>100000</time>
<tcp>
<ports>
<port>8080</port>
<port>28015</port>
<port>29015</port>
</ports>
</tcp>
</wait>
</run>
</image>
<image>
<name>norsknettarkiv/veidemann-db-initializer</name>
<name>norsknettarkiv/veidemann-db-initializer:${veidemann.rethinkdbadapter.version}</name>
<alias>db-initializer</alias>
<run>
<network>
Expand All @@ -384,14 +376,13 @@
<env>
<DB_HOST>db</DB_HOST>
<DB_USER>admin</DB_USER>
<DB_PASSWORD></DB_PASSWORD>
</env>
<log>
<enabled>true</enabled>
</log>
<wait>
<log>DB initialized</log>
<time>60000</time>
<exit>0</exit>
<time>20000</time>
</wait>
</run>
</image>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import io.grpc.ServerBuilder;
import io.opentracing.contrib.ServerTracingInterceptor;
import io.opentracing.util.GlobalTracer;
import no.nb.nna.veidemann.contentwriter.text.TextExtractor;
import no.nb.nna.veidemann.contentwriter.warc.WarcCollectionRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -44,12 +43,12 @@ public class ApiServer implements AutoCloseable {
/**
* Construct a new REST API server.
*/
public ApiServer(int port, int shutdownTimeoutSeconds, WarcCollectionRegistry warcCollectionRegistry, TextExtractor textExtractor) {
this(ServerBuilder.forPort(port), warcCollectionRegistry, textExtractor);
public ApiServer(int port, int shutdownTimeoutSeconds, WarcCollectionRegistry warcCollectionRegistry) {
this(ServerBuilder.forPort(port), warcCollectionRegistry);
this.shutdownTimeoutSeconds = shutdownTimeoutSeconds;
}

public ApiServer(ServerBuilder<?> serverBuilder, WarcCollectionRegistry warcCollectionRegistry, TextExtractor textExtractor) {
public ApiServer(ServerBuilder<?> serverBuilder, WarcCollectionRegistry warcCollectionRegistry) {

ServerTracingInterceptor tracingInterceptor = new ServerTracingInterceptor.Builder(GlobalTracer.get())
.withTracedAttributes(ServerTracingInterceptor.ServerRequestAttribute.CALL_ATTRIBUTES,
Expand All @@ -61,7 +60,7 @@ public ApiServer(ServerBuilder<?> serverBuilder, WarcCollectionRegistry warcColl
threadPool = Executors.newCachedThreadPool();
serverBuilder.executor(threadPool);

server = serverBuilder.addService(new ContentWriterService(warcCollectionRegistry, textExtractor)).build();
server = serverBuilder.addService(new ContentWriterService(warcCollectionRegistry)).build();
}

public ApiServer start() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import no.nb.nna.veidemann.commons.db.DbService;
import no.nb.nna.veidemann.commons.opentracing.TracerFactory;
import no.nb.nna.veidemann.contentwriter.settings.Settings;
import no.nb.nna.veidemann.contentwriter.text.TextExtractor;
import no.nb.nna.veidemann.contentwriter.warc.WarcCollectionRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -60,8 +59,7 @@ public ContentWriter() {
public ContentWriter start() {
try (DbService db = DbService.configure(SETTINGS);
WarcCollectionRegistry warcCollectionRegistry = new WarcCollectionRegistry();
TextExtractor textExtractor = new TextExtractor();
ApiServer apiServer = new ApiServer(SETTINGS.getApiPort(), SETTINGS.getTerminationGracePeriodSeconds(), warcCollectionRegistry, textExtractor);) {
ApiServer apiServer = new ApiServer(SETTINGS.getApiPort(), SETTINGS.getTerminationGracePeriodSeconds(), warcCollectionRegistry)) {

registerShutdownHook();

Expand Down Expand Up @@ -89,15 +87,7 @@ private void registerShutdownHook() {
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
// Use stderr here since the logger may have been reset by its JVM shutdown hook.
System.err.println("*** shutting down since JVM is shutting down");

mainThread.interrupt();
try {
mainThread.join();
} catch (InterruptedException e) {
//
}
System.err.println("*** gracefully shut down");

}));
}

Expand Down
Loading

0 comments on commit f218ada

Please sign in to comment.