반응형

# Jsoup API 활용 (Jsoup API 이용한 크롤링(리소스 다운로드))

  • 웹 페이지에 존재하는 리소스의 종류는 다양함 (파일, 영상, 이미지 등)
  • 스레드 생성하여 다운로드 받을 수 있도록 진행.
  • 리소스 크롤링의 경우 저작권 등 주의.
  • 동영상, 이미지 크롤링 구현 로직
import java.io.BufferedReader;
import java.io.InputStreamReader;

import org.jsoup.Jsoup;
import org.jsoup.select.Elements;

import kr.inflearn.DownloadBroker;

import org.jsoup.nodes.*;

public class Project02_C {
	public static void main(String[] args) {
		String url1 = "사이트 주소 입력";
		String url2 = "http://ncov.mohw.go.kr/";
		
		try {			
			Document doc1 = Jsoup.connect(url1).post();
			Document doc2 = Jsoup.connect(url2).post();

			// 리소스 다운로드.
			// 1. mp3
			Element source1 = doc1.select("tr .list").first();
			System.out.println(source1);
			String audio = source1.attr("src").trim();
			System.out.println(audio);
			String fileName1 = audio.substring(audio.lastIndexOf("/")+1);		
			Runnable r1 = new DownloadBroker(audio, fileName1);
			Thread dLoad1 = new Thread(r1);
			dLoad1.start();

			// 2. image
			Element source2 = doc2.select("img").first();
			String imgUrl = "http://ncov.mohw.go.kr/" + source2.attr("src").trim();
			System.out.println(imgUrl);
			String fileName2 = imgUrl.substring(imgUrl.lastIndexOf("/")+1);
			Runnable r2 = new DownloadBroker(imgUrl, fileName2);
			Thread dLoad2 = new Thread(r2);
			dLoad2.start();
			
			for (int i = 0; i < 10; i++) {
				try {
					Thread.sleep(1000);	// 1초 = 1000
				} catch (Exception e) {
					e.printStackTrace();
				}
				System.out.println(" " + (i+1));
			}
			System.out.println();
			System.out.println("================================");
		} catch (Exception e) {
			e.printStackTrace();
		}
		
	}
}
package kr.inflearn;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.URL;

public class DownloadBroker implements Runnable {
	private String address;
	private String fileName;
	
	public DownloadBroker(String address) {
		this.address = address;
	}
	
	public DownloadBroker(String address, String fileName) {
		this.address = address;
		this.fileName = fileName;
	}
	
	@Override
	public void run() {
		try {
			FileOutputStream fos = new FileOutputStream(fileName);
			BufferedOutputStream bos = new BufferedOutputStream(fos);
			URL url = new URL(address);
			InputStream is = url.openStream();
			BufferedInputStream input = new BufferedInputStream(is);
			
			int data;
			while ((data = input.read()) != -1) {
				bos.write(data);
			}
			bos.close();
			input.close();
			System.out.println("다운로드 완료...");
			System.out.println(fileName);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}
반응형

+ Recent posts