java抓取网站图片
//抓取网站图片 public class CatchWebImg{ //网站域名 static String domainUrl; public static void main(String[] args) throws Exception{ String Content = new String(); String nextUrl = null; domainUrl = ""; //MyUrl类抓根据URL抓取网站源代码 MyUrl listObj = new MyUrl(""); //设置抓取返回编码格式 listObj.setEncoding("utf-8"); //目标Url正则 listObj.setTargetRegex(""); //下一页正则 listObj.setNextList(""); while(true){ Content = listObj.getUrlContent(); //MyUrl类返回正则匹配到的网址 String[] urls = listObj.getTargetUrls(Content).substring(1).split(","); for(int i = 0; i < urls.length; i++){ System.out.println(urls[i]); //访问匹配到的网址 Thread td = new Thread(new SonFile(domainUrl + urls[i])); td.start(); Thread.sleep(3000); } //正则获取翻页网址 nextUrl = listObj.getNextListUrl(Content); if(nextUrl == null){ break; } //此处根据正则进行填写 nextUrl = domainUrl + nextUrl.replace("", ""); listObj.setTargetUrl(nextUrl); Thread.sleep(8 * 1000); } } } class SonFile implements Runnable{ private String tmpURL; //线程池,抓取某一链接下相关图片的线程 private static ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10); public SonFile(String url){ this.tmpURL = url; } @Override public void run() { // TODO Auto-generated method stub MyUrl obj = new MyUrl(this.tmpURL); //返回网页编码 obj.setEncoding("utf-8"); //图片标题 obj.setTitleRegex(""); //图片URL正则 obj.setTargetRegex(""); String[] urls; try { String Content = obj.getUrlContent(); String title = obj.getBodyTitle(Content); try { title = title.substring( 0, title.indexOf(" ")); urls = obj.getTargetUrls(Content).substring(1).split(","); System.out.println(title); for(int i = 0; i < urls.length; i += 2){ Thread.sleep(600); //抓取图片线程 MyTask td = new MyTask(title, urls[i], i / 2); executor.execute(td); } } catch (Exception e) { title = "Error"; } } catch (Exception e) { e.printStackTrace(); } } } class MyTask implements Runnable{ private String tmpURL; private String tmpDirName; private int tmpIndex; public MyTask(String dirName, String url, int index){ this.tmpDirName = dirName; this.tmpURL = url; this.tmpIndex = index; } @Override public void run() { try { this.download(); } catch (Exception e) { e.printStackTrace(); } } //保存图片到本地 public void download(){ URL url; byte[] bs = new byte[4096]; int len; String Path = ""; File dirFile = new File(Path + this.tmpDirName); if(!dirFile.exists()){ dirFile.mkdir(); } try { url = new URL(this.tmpURL); URLConnection con = url.openConnection(); con.setConnectTimeout(8 * 1000); InputStream is = con.getInputStream(); OutputStream os = new FileOutputStream(dirFile.getPath().trim() + "\\" + this.tmpIndex + ".jpg"); while ((len = is.read(bs)) != - 1) { os.write(bs, 0, len); } os.close(); is.close(); } catch (Exception e) { System.err.println("********* " + e.toString() + " *********"); } } }
相关推荐
-
RedisShardPoolUtil java
2019-1-13
-
微信api操作工具类 java
2019-1-8
-
java 跨数据库导入大数据 java
2019-1-8
-
SpringBoot中使用Websocket进行消息推送 java
2019-1-7
-
JAVA实现RSA加密,非对称加密算法 java
2019-1-8
-
JAVA身份证工具类 java
2019-1-8
-
java生成图片验证码 java
2019-1-8
-
QRCodeGenerator.java java
2019-1-8
-
java 微信开发 常用工具类(xml传输和解析 json转换对象) java
2019-1-8
-
SpringMvc执行流程 java
2019-1-8