- 정규식(Regular expression)을 이용하는 방법
- HTML Parser 를 이용하는 방법
여기에서는오픈소스 기반의 HTML Parser jsoup 을 이용하여 간단하게 게시물의 본문에서 첫번째 이미지 URL 을 추출하고자 한다.
package architecture.ee.web.community.announce.impl; import java.io.Serializable; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import architecture.common.model.factory.ModelTypeFactory; import architecture.common.model.support.NoNamedEntityModelObjectSupport; import architecture.common.user.User; import architecture.ee.web.attachment.Attachment; import architecture.ee.web.community.announce.Announce; public class DefaultAnnounce extends NoNamedEntityModelObjectSupport implements Announce { private Long announceId; private int objectType ; private Long objectId; private Long userId ; private String subject ; private String body; private Date startDate; private Date endDate; private User user; private Listattachments; private String firstImageSrc ; private int imageCount = 0; /** 중략 **/ /** * @return body */ public String getBody() { return body; } /** * @param body 설정할 body */ public void setBody(String body) { this.body = body; Document doc = Jsoup.parse(this.body); Elements links = doc.select("img"); this.imageCount = links.size(); if( imageCount > 0 ) firstImageSrc = links.first().attr("src"); } public String getFirstImageSrc(){ return this.firstImageSrc; } /** * @return imageCount */ public int getImageCount() { return imageCount; } }
자세한 jsoup api 사용법은 문서를 참고
댓글 없음:
댓글 쓰기