-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #21 from jisung-in/feature/17-book-crawling
[Feature] Book 크롤링 기능 추가
- Loading branch information
Showing
10 changed files
with
240 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
public interface Crawler { | ||
|
||
CrawlingBook crawlBook(String isbn); | ||
|
||
} |
25 changes: 25 additions & 0 deletions
25
src/main/java/com/jisungin/infra/crawler/CrawlingBook.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
import lombok.Builder; | ||
import lombok.Getter; | ||
|
||
@Getter | ||
public class CrawlingBook { | ||
|
||
private String imageUrl; | ||
private String content; | ||
|
||
@Builder | ||
private CrawlingBook(String imageUrl, String content) { | ||
this.imageUrl = imageUrl; | ||
this.content = content; | ||
} | ||
|
||
public static CrawlingBook of(String imageUrl, String content) { | ||
return CrawlingBook.builder() | ||
.imageUrl(imageUrl) | ||
.content(content) | ||
.build(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
import org.jsoup.nodes.Document; | ||
|
||
public interface Fetcher { | ||
|
||
Document fetchIsbn(String isbn); | ||
Document fetchBook(String bookId); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
import org.jsoup.nodes.Document; | ||
|
||
public interface Parser { | ||
|
||
String parseIsbn(Document doc); | ||
CrawlingBook parseBook(Document doc); | ||
|
||
} |
20 changes: 20 additions & 0 deletions
20
src/main/java/com/jisungin/infra/crawler/Yes24Crawler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
import lombok.RequiredArgsConstructor; | ||
import org.springframework.stereotype.Component; | ||
|
||
@Component | ||
@RequiredArgsConstructor | ||
public class Yes24Crawler implements Crawler { | ||
|
||
private final Fetcher fetcher; | ||
private final Parser parser; | ||
|
||
@Override | ||
public CrawlingBook crawlBook(String isbn) { | ||
String bookId = parser.parseIsbn(fetcher.fetchIsbn(isbn)); | ||
|
||
return parser.parseBook(fetcher.fetchBook(bookId)); | ||
} | ||
|
||
} |
23 changes: 23 additions & 0 deletions
23
src/main/java/com/jisungin/infra/crawler/Yes24CrawlerConstant.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
public class Yes24CrawlerConstant { | ||
|
||
public static final String BASE_URL = "https://www.yes24.com/Product"; | ||
public static final String ISBN_URL = BASE_URL + "/Search?domain=BOOK&query="; | ||
public static final String BOOK_URL = BASE_URL + "/Goods/"; | ||
public static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36"; | ||
public static final String ISBN_CSS = "ul#yesSchList > li"; | ||
public static final String ISBN_ATTR = "data-goods-no"; | ||
public static final String BOOK_IMAGE_CSS = "span.gd_img > em.imgBdr > img.gImg"; | ||
public static final String BOOK_IMAGE_ATTR = "src"; | ||
public static final String BOOK_CONTENT_CSS = "div.infoWrap_txt > div.infoWrap_txtInner"; | ||
|
||
public static String getIsbnUrl(String isbn) { | ||
return ISBN_URL + isbn; | ||
} | ||
|
||
public static String getBookUrl(String bookId) { | ||
return BOOK_URL + bookId; | ||
} | ||
|
||
} |
41 changes: 41 additions & 0 deletions
41
src/main/java/com/jisungin/infra/crawler/Yes24Fetcher.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.*; | ||
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.USER_AGENT; | ||
|
||
import com.jisungin.exception.BusinessException; | ||
import com.jisungin.exception.ErrorCode; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.springframework.stereotype.Component; | ||
|
||
@Component | ||
public class Yes24Fetcher implements Fetcher { | ||
|
||
@Override | ||
public Document fetchIsbn(String isbn) { | ||
try { | ||
return Jsoup.connect(getIsbnUrl(isbn)) | ||
.timeout(5000) | ||
.userAgent(USER_AGENT) | ||
.ignoreContentType(true) | ||
.get(); | ||
} catch (Exception e) { | ||
throw new BusinessException(ErrorCode.BOOK_NOT_FOUND); | ||
} | ||
} | ||
|
||
@Override | ||
public Document fetchBook(String bookId) { | ||
try { | ||
return Jsoup.connect(getBookUrl(bookId)) | ||
.timeout(5000) | ||
.userAgent(USER_AGENT) | ||
.ignoreContentType(true) | ||
.get(); | ||
} catch (Exception e) { | ||
throw new BusinessException(ErrorCode.BOOK_NOT_FOUND); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package com.jisungin.infra.crawler; | ||
|
||
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.BOOK_CONTENT_CSS; | ||
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.BOOK_IMAGE_ATTR; | ||
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.BOOK_IMAGE_CSS; | ||
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.ISBN_ATTR; | ||
import static com.jisungin.infra.crawler.Yes24CrawlerConstant.ISBN_CSS; | ||
|
||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.safety.Safelist; | ||
import org.springframework.stereotype.Component; | ||
|
||
@Component | ||
public class Yes24Parser implements Parser { | ||
@Override | ||
public String parseIsbn(Document doc) { | ||
return doc.select(ISBN_CSS).attr(ISBN_ATTR); | ||
} | ||
|
||
@Override | ||
public CrawlingBook parseBook(Document doc) { | ||
String image = doc.select(BOOK_IMAGE_CSS).attr(BOOK_IMAGE_ATTR); | ||
String content = Jsoup.clean(doc.select(BOOK_CONTENT_CSS).text(), Safelist.none()); | ||
|
||
return CrawlingBook.of(image, content); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
package com.jisungin.infra; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
import static org.assertj.core.api.Assertions.assertThatThrownBy; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
|
||
import com.jisungin.exception.BusinessException; | ||
import com.jisungin.exception.ErrorCode; | ||
import com.jisungin.infra.crawler.CrawlingBook; | ||
import com.jisungin.infra.crawler.Yes24Crawler; | ||
import com.jisungin.infra.crawler.Yes24Fetcher; | ||
import com.jisungin.infra.crawler.Yes24Parser; | ||
import org.jsoup.nodes.Document; | ||
import org.junit.jupiter.api.DisplayName; | ||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.extension.ExtendWith; | ||
import org.mockito.InjectMocks; | ||
import org.mockito.Mock; | ||
import org.mockito.junit.jupiter.MockitoExtension; | ||
|
||
@ExtendWith(MockitoExtension.class) | ||
public class Yes24CrawlerTest { | ||
|
||
@InjectMocks | ||
private Yes24Crawler crawler; | ||
|
||
@Mock | ||
private Yes24Parser parser; | ||
|
||
@Mock | ||
private Yes24Fetcher fetcher; | ||
|
||
@Test | ||
@DisplayName("isbn을 통해 크롤링 된 책을 생성한다.") | ||
public void crawlingBook() { | ||
// given | ||
String isbn = "0000000000"; | ||
String bookId = "1111111111"; | ||
|
||
Document isbnDocument = mock(Document.class); | ||
Document bookDocument = mock(Document.class); | ||
|
||
CrawlingBook crawlingBook = CrawlingBook.of("image url link", "crawling content"); | ||
|
||
when(fetcher.fetchIsbn(isbn)).thenReturn(isbnDocument); | ||
when(fetcher.fetchBook(bookId)).thenReturn(bookDocument); | ||
when(parser.parseIsbn(isbnDocument)).thenReturn(bookId); | ||
when(parser.parseBook(bookDocument)).thenReturn(crawlingBook); | ||
|
||
// when | ||
CrawlingBook expectedCrawlingBook = crawler.crawlBook(isbn); | ||
|
||
// then | ||
assertThat(expectedCrawlingBook).isEqualTo(crawlingBook); | ||
} | ||
|
||
@Test | ||
@DisplayName("올바르지 않은 isbn을 입력하면 예외가 발생한다.") | ||
public void crawlingBookWithInvalidIsbn() { | ||
// given | ||
String isbn = "XXXXXXXXXX"; | ||
|
||
when(fetcher.fetchIsbn(isbn)).thenThrow(new BusinessException(ErrorCode.BOOK_NOT_FOUND)); | ||
|
||
// when then | ||
assertThatThrownBy(() -> crawler.crawlBook(isbn)) | ||
.isInstanceOf(BusinessException.class) | ||
.hasMessage("책을 찾을 수 없습니다."); | ||
} | ||
|
||
} |