- 정규식(Regular expression)을 이용하는 방법
- HTML Parser 를 이용하는 방법
여기에서는오픈소스 기반의 HTML Parser jsoup 을 이용하여 간단하게 게시물의 본문에서 첫번째 이미지 URL 을 추출하고자 한다.
package architecture.ee.web.community.announce.impl;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import architecture.common.model.factory.ModelTypeFactory;
import architecture.common.model.support.NoNamedEntityModelObjectSupport;
import architecture.common.user.User;
import architecture.ee.web.attachment.Attachment;
import architecture.ee.web.community.announce.Announce;
public class DefaultAnnounce extends NoNamedEntityModelObjectSupport implements Announce {
private Long announceId;
private int objectType ;
private Long objectId;
private Long userId ;
private String subject ;
private String body;
private Date startDate;
private Date endDate;
private User user;
private List attachments;
private String firstImageSrc ;
private int imageCount = 0;
/** 중략 **/
/**
* @return body
*/
public String getBody() {
return body;
}
/**
* @param body 설정할 body
*/
public void setBody(String body) {
this.body = body;
Document doc = Jsoup.parse(this.body);
Elements links = doc.select("img");
this.imageCount = links.size();
if( imageCount > 0 )
firstImageSrc = links.first().attr("src");
}
public String getFirstImageSrc(){
return this.firstImageSrc;
}
/**
* @return imageCount
*/
public int getImageCount() {
return imageCount;
}
}
자세한 jsoup api 사용법은 문서를 참고
댓글 없음:
댓글 쓰기