Jsoup Get favicon from html page
By:Roy.LiuLast updated:2019-08-18
There are many ways the favicon can be recognized by the web browser :
Example 1
<head> <link rel="icon" href="http://example.com/image.ico" /> </head>
Example 2
<head> <link rel="icon" href="http://example.com/image.png" /> </head>
Example 3 – weird, but Google use it.
<head> <meta content="/images/google_favicon_128.png" itemprop="image" /> </head>
1. Jsoup Example
Code snippets to get above favicon with Jsoup.
Element element = doc.head().select("link[href~=.*\\.(ico|png)]").first(); System.out.println(element.attr("href"));
Element element = doc.head().select("meta[itemprop=image]").first(); System.out.println(element.attr("content"));
2. Unit Test
Unit test example.
package com.webmitta.analysis; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.testng.Assert; import org.testng.annotations.Test; @Test public class TestFavIcon { Document document; private String PageProvider1() { StringBuffer html = new StringBuffer(); html.append("<!DOCTYPE html>"); html.append("<html lang=\"en\">"); html.append("<head>"); html.append("<link rel=\"icon\" href=\"http://example.com/image.ico\" />"); html.append("</head>"); html.append("<body>"); html.append("something"); html.append("</body>"); html.append("</html>"); return html.toString(); private String PageProvider2() { StringBuffer html = new StringBuffer(); html.append("<!DOCTYPE html>"); html.append("<html lang=\"en\">"); html.append("<head>"); html.append("<meta content=\"/images/google_favicon_128.png\" itemprop=\"image\">"); html.append("</head>"); html.append("<body>"); html.append("something"); html.append("</body>"); html.append("</html>"); return html.toString(); @Test void test_FavIcon1() { Document doc = Jsoup.parse(PageProvider1()); Element element = doc.head().select("link[href~=.*\\.(ico|png)]").first(); Assert.assertEquals(element.attr("href"), "http://example.com/image.ico"); @Test void test_FavIcon2() { Document doc = Jsoup.parse(PageProvider2()); Element element = doc.head().select("meta[itemprop=image]").first(); Assert.assertEquals(element.attr("content"), "/images/google_favicon_128.png");
Output
[TestNG] Running: C:\Users\mkyong2002\AppData\Local\Temp\testng-eclipse--1846902117\testng-customsuite.xml PASSED: test_FavIcon1 PASSED: test_FavIcon2
References
From:一号门
Previous:Java find location using Ip Address
COMMENTS