Skip to content

Commit

Permalink
Merge pull request #21 from jisung-in/feature/17-book-crawling
Browse files Browse the repository at this point in the history
[Feature] Book 크롤링 기능 추가
  • Loading branch information
jwooo authored Mar 23, 2024
2 parents 7ade783 + 6a6eefc commit 513b449
Show file tree
Hide file tree
Showing 10 changed files with 240 additions and 0 deletions.
3 changes: 3 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ dependencies {
testImplementation 'org.springframework.restdocs:spring-restdocs-mockmvc'
// ARM Native Library Compatibility
runtimeOnly 'io.netty:netty-resolver-dns-native-macos:4.1.104.Final:osx-aarch_64'
// Jsoup Web Crawling Library
implementation 'org.jsoup:jsoup:1.16.2'

}

tasks.named('bootBuildImage') {
Expand Down
7 changes: 7 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/Crawler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.jisungin.infra.crawler;

public interface Crawler {

CrawlingBook crawlBook(String isbn);

}
25 changes: 25 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/CrawlingBook.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.jisungin.infra.crawler;

import lombok.Builder;
import lombok.Getter;

@Getter
public class CrawlingBook {

private String imageUrl;
private String content;

@Builder
private CrawlingBook(String imageUrl, String content) {
this.imageUrl = imageUrl;
this.content = content;
}

public static CrawlingBook of(String imageUrl, String content) {
return CrawlingBook.builder()
.imageUrl(imageUrl)
.content(content)
.build();
}

}
10 changes: 10 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/Fetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.jisungin.infra.crawler;

import org.jsoup.nodes.Document;

public interface Fetcher {

Document fetchIsbn(String isbn);
Document fetchBook(String bookId);

}
10 changes: 10 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/Parser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.jisungin.infra.crawler;

import org.jsoup.nodes.Document;

public interface Parser {

String parseIsbn(Document doc);
CrawlingBook parseBook(Document doc);

}
20 changes: 20 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/Yes24Crawler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package com.jisungin.infra.crawler;

import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Component;

@Component
@RequiredArgsConstructor
public class Yes24Crawler implements Crawler {

private final Fetcher fetcher;
private final Parser parser;

@Override
public CrawlingBook crawlBook(String isbn) {
String bookId = parser.parseIsbn(fetcher.fetchIsbn(isbn));

return parser.parseBook(fetcher.fetchBook(bookId));
}

}
23 changes: 23 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/Yes24CrawlerConstant.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package com.jisungin.infra.crawler;

public class Yes24CrawlerConstant {

public static final String BASE_URL = "https://www.yes24.com/Product";
public static final String ISBN_URL = BASE_URL + "/Search?domain=BOOK&query=";
public static final String BOOK_URL = BASE_URL + "/Goods/";
public static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36";
public static final String ISBN_CSS = "ul#yesSchList > li";
public static final String ISBN_ATTR = "data-goods-no";
public static final String BOOK_IMAGE_CSS = "span.gd_img > em.imgBdr > img.gImg";
public static final String BOOK_IMAGE_ATTR = "src";
public static final String BOOK_CONTENT_CSS = "div.infoWrap_txt > div.infoWrap_txtInner";

public static String getIsbnUrl(String isbn) {
return ISBN_URL + isbn;
}

public static String getBookUrl(String bookId) {
return BOOK_URL + bookId;
}

}
41 changes: 41 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/Yes24Fetcher.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package com.jisungin.infra.crawler;

import static com.jisungin.infra.crawler.Yes24CrawlerConstant.*;
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.USER_AGENT;

import com.jisungin.exception.BusinessException;
import com.jisungin.exception.ErrorCode;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.springframework.stereotype.Component;

@Component
public class Yes24Fetcher implements Fetcher {

@Override
public Document fetchIsbn(String isbn) {
try {
return Jsoup.connect(getIsbnUrl(isbn))
.timeout(5000)
.userAgent(USER_AGENT)
.ignoreContentType(true)
.get();
} catch (Exception e) {
throw new BusinessException(ErrorCode.BOOK_NOT_FOUND);
}
}

@Override
public Document fetchBook(String bookId) {
try {
return Jsoup.connect(getBookUrl(bookId))
.timeout(5000)
.userAgent(USER_AGENT)
.ignoreContentType(true)
.get();
} catch (Exception e) {
throw new BusinessException(ErrorCode.BOOK_NOT_FOUND);
}
}

}
29 changes: 29 additions & 0 deletions src/main/java/com/jisungin/infra/crawler/Yes24Parser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.jisungin.infra.crawler;

import static com.jisungin.infra.crawler.Yes24CrawlerConstant.BOOK_CONTENT_CSS;
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.BOOK_IMAGE_ATTR;
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.BOOK_IMAGE_CSS;
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.ISBN_ATTR;
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.ISBN_CSS;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.safety.Safelist;
import org.springframework.stereotype.Component;

@Component
public class Yes24Parser implements Parser {
@Override
public String parseIsbn(Document doc) {
return doc.select(ISBN_CSS).attr(ISBN_ATTR);
}

@Override
public CrawlingBook parseBook(Document doc) {
String image = doc.select(BOOK_IMAGE_CSS).attr(BOOK_IMAGE_ATTR);
String content = Jsoup.clean(doc.select(BOOK_CONTENT_CSS).text(), Safelist.none());

return CrawlingBook.of(image, content);
}

}
72 changes: 72 additions & 0 deletions src/test/java/com/jisungin/infra/Yes24CrawlerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package com.jisungin.infra;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import com.jisungin.exception.BusinessException;
import com.jisungin.exception.ErrorCode;
import com.jisungin.infra.crawler.CrawlingBook;
import com.jisungin.infra.crawler.Yes24Crawler;
import com.jisungin.infra.crawler.Yes24Fetcher;
import com.jisungin.infra.crawler.Yes24Parser;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

@ExtendWith(MockitoExtension.class)
public class Yes24CrawlerTest {

@InjectMocks
private Yes24Crawler crawler;

@Mock
private Yes24Parser parser;

@Mock
private Yes24Fetcher fetcher;

@Test
@DisplayName("isbn을 통해 크롤링 된 책을 생성한다.")
public void crawlingBook() {
// given
String isbn = "0000000000";
String bookId = "1111111111";

Document isbnDocument = mock(Document.class);
Document bookDocument = mock(Document.class);

CrawlingBook crawlingBook = CrawlingBook.of("image url link", "crawling content");

when(fetcher.fetchIsbn(isbn)).thenReturn(isbnDocument);
when(fetcher.fetchBook(bookId)).thenReturn(bookDocument);
when(parser.parseIsbn(isbnDocument)).thenReturn(bookId);
when(parser.parseBook(bookDocument)).thenReturn(crawlingBook);

// when
CrawlingBook expectedCrawlingBook = crawler.crawlBook(isbn);

// then
assertThat(expectedCrawlingBook).isEqualTo(crawlingBook);
}

@Test
@DisplayName("올바르지 않은 isbn을 입력하면 예외가 발생한다.")
public void crawlingBookWithInvalidIsbn() {
// given
String isbn = "XXXXXXXXXX";

when(fetcher.fetchIsbn(isbn)).thenThrow(new BusinessException(ErrorCode.BOOK_NOT_FOUND));

// when then
assertThatThrownBy(() -> crawler.crawlBook(isbn))
.isInstanceOf(BusinessException.class)
.hasMessage("책을 찾을 수 없습니다.");
}

}

0 comments on commit 513b449

Please sign in to comment.