Skip to content

Commit

Permalink
init commit
Browse files Browse the repository at this point in the history
  • Loading branch information
nobaksan committed Apr 10, 2018
1 parent c3576d2 commit 308d801
Show file tree
Hide file tree
Showing 10 changed files with 462 additions and 0 deletions.
63 changes: 63 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Created by .ignore support plugin (hsz.mobi)
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
.idea/**


# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml

# Gradle:
.idea/**/gradle.xml
.idea/**/libraries

# CMake
cmake-build-debug/
cmake-build-release/

# Mongo Explorer plugin:
.idea/**/mongoSettings.xml

## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

.idea/compiler.xml
.idea/copyright/
.idea/libraries/
.idea/markdown-navigator/
.idea/misc.xml
.idea/modules.xml
.idea/preferred-vcs.xml
.idea/workspace.xml
elasticsearch-merge-token-filter.iml
65 changes: 65 additions & 0 deletions .ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Created by .ignore support plugin (hsz.mobi)
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries
.idea/**


# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml

# Gradle:
.idea/**/gradle.xml
.idea/**/libraries

# CMake
cmake-build-debug/
cmake-build-release/

# Mongo Explorer plugin:
.idea/**/mongoSettings.xml

## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

.idea/compiler.xml
.idea/copyright/
.idea/libraries/
.idea/markdown-navigator/
.idea/misc.xml
.idea/modules.xml
.idea/preferred-vcs.xml
.idea/workspace.xml
.idea/encodings.xml
.idea/vcs.xml
elasticsearch-merge-token-filter.iml
6 changes: 6 additions & 0 deletions plugin-descriptor.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
description=Merge token filter
version=${elasticsearch.version}
name=elasticsearch-merge-token-filter
classname=org.elasticsearch.plugin.analysis.starstory.AnalysisMergeFilterPlugin
java.version=1.8
elasticsearch.version=${elasticsearch.version}
138 changes: 138 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-merge-token-filter</artifactId>
<version>6.1.0</version>

<packaging>jar</packaging>

<properties>
<lucene.version>7.2.1</lucene.version>
<elasticsearch.version>6.1.1</elasticsearch.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>

<dependencies>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.elasticsearch.test/framework -->
<dependency>
<groupId>org.elasticsearch.test</groupId>
<artifactId>framework</artifactId>
<version>6.1.1</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.5</version>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.6.4</version>
</dependency>

<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>
<version>${lucene.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
<scope>test</scope>
</dependency>


<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8.2</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>


</dependencies>

<build>
<testResources>
<testResource>
<directory>${project.basedir}/src/test/resources</directory>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.2</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.3</version>
<configuration>
<outputDirectory>${project.build.directory}/releases/</outputDirectory>
<descriptors>
<descriptor>${basedir}/src/main/assemblies/plugin.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.14</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-install-plugin</artifactId>
<version>2.4</version>
</plugin>
</plugins>
</build>

</project>
34 changes: 34 additions & 0 deletions src/main/assemblies/plugin.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
<?xml version="1.0"?>
<assembly>
<id>plugin</id>
<formats>
<format>zip</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>elasticsearch/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<excludes>
<exclude>org.elasticsearch:elasticsearch</exclude>
</excludes>
</dependencySet>
<dependencySet>
<outputDirectory>elasticsearch/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>
<include>org.slf4j:slf4j-api</include>
<include>org.slf4j:slf4j-simple</include>
</includes>
</dependencySet>
</dependencySets>
<files>
<file>
<source>plugin-descriptor.properties</source>
<outputDirectory>elasticsearch/</outputDirectory>
<filtered>true</filtered>
</file>
</files>
</assembly>
64 changes: 64 additions & 0 deletions src/main/java/elasticsearch/merge/MergeTokenFilter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package elasticsearch.merge;

import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeSource;

import java.io.IOException;

/**
* Created by nobaksan on 2018. 4. 9..
*/
public class MergeTokenFilter extends TokenFilter {
private final CharTermAttribute termAtt = (CharTermAttribute)addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = (PositionIncrementAttribute)addAttribute(PositionIncrementAttribute.class);
private String tokenSeparator = null;
private int incrementGap = 100;
private StringBuilder builder = new StringBuilder();
private AttributeSource.State previousState = null;
private boolean recheckPrevious = false;

public MergeTokenFilter(TokenStream input, String tokenSeparator)
{
super(input);
this.tokenSeparator = (tokenSeparator != null ? tokenSeparator : "_");
}

@Override
public boolean incrementToken() throws IOException
{
boolean empty = false;
this.builder.setLength(0);
if (this.recheckPrevious)
{
restoreState(this.previousState);

this.builder.append(this.termAtt.buffer(), 0, this.termAtt.length());
this.recheckPrevious = false;
}
while (this.input.incrementToken()) {
if (this.posIncrAtt.getPositionIncrement() <= this.incrementGap)
{
if (this.builder.length() > 0) {
this.builder.append(this.tokenSeparator);
}
this.builder.append(this.termAtt.buffer(), 0, this.termAtt.length());
}
else
{
this.recheckPrevious = true;
this.previousState = captureState();
}
}
if (this.builder.length() > 0)
{
this.termAtt.setEmpty().append(this.builder);
if (!this.recheckPrevious) {
empty = true;
}
}
return empty;
}
}
Loading

0 comments on commit 308d801

Please sign in to comment.