-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add robots.txt, introduce first search draft
- Loading branch information
querwurzel
committed
Nov 12, 2023
1 parent
6a102e5
commit 339f48e
Showing
24 changed files
with
393 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
backend/src/main/java/com/github/binpastes/paste/business/fulltext/Indexer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
package com.github.binpastes.paste.business.fulltext; | ||
|
||
import com.github.binpastes.paste.domain.Paste; | ||
import com.github.binpastes.paste.domain.PasteRepository; | ||
import com.github.binpastes.util.IdGenerator; | ||
import jakarta.annotation.PostConstruct; | ||
import org.apache.lucene.analysis.standard.StandardAnalyzer; | ||
import org.apache.lucene.document.*; | ||
import org.apache.lucene.index.*; | ||
import org.apache.lucene.queryparser.classic.ParseException; | ||
import org.apache.lucene.queryparser.classic.QueryParser; | ||
import org.apache.lucene.search.*; | ||
import org.apache.lucene.store.ByteBuffersDirectory; | ||
import org.apache.lucene.store.NIOFSDirectory; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.stereotype.Component; | ||
import reactor.core.publisher.Flux; | ||
|
||
import java.io.IOException; | ||
import java.io.UncheckedIOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
@Component | ||
public class Indexer { | ||
|
||
private final ByteBuffersDirectory index = new ByteBuffersDirectory(); | ||
|
||
IndexWriter writer; | ||
|
||
private final PasteRepository pasteRepository; | ||
|
||
@Autowired | ||
public Indexer(PasteRepository pasteRepository) { | ||
this.pasteRepository = pasteRepository; | ||
} | ||
|
||
@PostConstruct | ||
private void postConstruct() { | ||
try { | ||
var analyzer = new StandardAnalyzer(); | ||
var config = new IndexWriterConfig(analyzer); | ||
writer = new IndexWriter(index, config); | ||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} | ||
|
||
} | ||
|
||
public void index(Paste paste) { | ||
try { | ||
if (paste.getTitle() == null && paste.isEncrypted()) { | ||
return; | ||
} | ||
|
||
Document document = new Document(); | ||
document.add(new StringField("id", paste.getId(), Field.Store.YES)); | ||
|
||
if (paste.getTitle() != null) { | ||
document.add(new TextField("title", paste.getTitle(), Field.Store.NO)); | ||
} | ||
|
||
if (!paste.isEncrypted()) { | ||
document.add(new TextField("content", paste.getContent(), Field.Store.NO)); | ||
} | ||
|
||
writer.addDocument(document); | ||
writer.flush(); | ||
writer.commit(); | ||
|
||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} | ||
} | ||
|
||
public Flux<Paste> query(String term) { | ||
try { | ||
var indexSearcher = new IndexSearcher(DirectoryReader.open(index)); | ||
|
||
BooleanQuery booleanQuery | ||
= new BooleanQuery.Builder() | ||
.add(new FuzzyQuery(new Term("title", term)), BooleanClause.Occur.SHOULD) | ||
.add(new FuzzyQuery(new Term("content", term)), BooleanClause.Occur.SHOULD) | ||
.build(); | ||
|
||
TopDocs hits = indexSearcher.search(booleanQuery, 10); | ||
StoredFields storedFields = indexSearcher.storedFields(); | ||
|
||
List<Document> documents = new ArrayList<>(); | ||
for (ScoreDoc hit : hits.scoreDocs) { | ||
Document doc = storedFields.document(hit.doc); | ||
documents.add(doc); | ||
} | ||
|
||
System.out.println(hits.totalHits.value + " / " + indexSearcher.getIndexReader().maxDoc()); | ||
|
||
return Flux.fromIterable(documents) | ||
.map(doc -> doc.get("id")) | ||
.flatMap(pasteRepository::findOneLegitById); | ||
} catch (Exception e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<configuration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:activemq" xsi:schemaLocation="urn:activemq /schema/artemis-server.xsd"> | ||
<core xmlns="urn:activemq:core"> | ||
|
||
<persistence-enabled>true</persistence-enabled> | ||
|
||
<security-enabled>false</security-enabled> | ||
|
||
<acceptors> | ||
<acceptor name="in-vm">vm://0</acceptor> | ||
</acceptors> | ||
|
||
<bindings-directory>./tracking</bindings-directory> | ||
|
||
<journal-pool-files>3</journal-pool-files> | ||
|
||
<addresses> | ||
<address name="binpastes"> | ||
<anycast> | ||
<queue name="pasteTrackingQueue" max-consumers="1"> | ||
<durable>true</durable> | ||
</queue> | ||
</anycast> | ||
</address> | ||
</addresses> | ||
|
||
|
||
</core> | ||
</configuration> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
User-agent: * | ||
Disallow: /api/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.