Bootstrap

记使用JSoup来爬取NVIDIA GPU列表

1、倒入Jsoup依赖

		<dependency>
			<groupId>org.jsoup</groupId>
			<artifactId>jsoup</artifactId>
			<version>1.10.2</version>
		</dependency>

2、上代码

package ai.dekube.clustermonitor.server.service;

import ai.dekube.clustermonitor.server.ServerApplication;
import ai.dekube.clustermonitor.server.repository.dao.NvidiaGPUDao;
import ai.dekube.clustermonitor.server.repository.model.NvidiaGPUEntity;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;

import javax.annotation.Resource;
import java.io.IOException;

@RunWith(SpringRunner.class)
@SpringBootTest(classes = ServerApplication.class)
public class NavidiaServiceTest {

    @Resource
    private NvidiaGPUDao nvidiaGPUDao;

    @Test
    public void poll() throws IOException, InterruptedException {
        for (int year = 2014; year <= 2024; year++) {
            String url = "https://www.techpowerup.com/gpu-specs/?mfgr=NVIDIA&sort=name&released=" + year;
            Document document = Jsoup.connect(url).get();
            Element tableElement = document.getElementsByClass("table-wrapper").get(0);
            Elements trElements = tableElement.getElementsByTag("tr");
            Elements elements = document.getElementsByClass("vendor-NVIDIA");
         
            for (int i = 0; i < elements.size(); i++) {
                Elements aElements = elements.get(i).getElementsByAttribute("href");
                Element aElement = aElements.get(0);
                Element trElement = trElements.get(i + 2);
                Elements tdElement = trElement.getElementsByTag("td");

                String nvidiaUri = aElement.attr("href");
                Document nvidiaDocument = Jsoup.connect("https://www.techpowerup.com" + nvidiaUri).get();
                Elements clearfixElements = nvidiaDocument.getElementsByClass("clearfix");

                String FP16 = "";
                String FP32 = "";
                String BF16 = "";

                for (Element clearfixElement : clearfixElements) {
                    Elements dtElements = clearfixElement.getElementsByTag("dt");
                    Element dtElement = dtElements.get(0);
                    if (dtElement.text().equals("FP16 (half)")) {
                        Elements ddElements = clearfixElement.getElementsByTag("dd");
                        FP16 = ddElements.get(0).text();
                    } else if (dtElement.text().equals("FP32 (float)")) {
                        Elements ddElements = clearfixElement.getElementsByTag("dd");
                        FP32 = ddElements.get(0).text();
                    } else if (dtElement.text().equals("BF16")) {
                        Elements ddElements = clearfixElement.getElementsByTag("dd");
                        BF16 = ddElements.get(0).text();
                    }
                }

                NvidiaGPUEntity entity = NvidiaGPUEntity.builder()
                        .productName(tdElement.get(0).text())
                        .gpuChip(tdElement.get(1).text())
                        .released(tdElement.get(2).text())
                        .bus(tdElement.get(3).text())
                        .memory(tdElement.get(4).text())
                        .gpuClock(tdElement.get(5).text())
                        .memoryClock(tdElement.get(6).text())
                        .shaders(tdElement.get(7).text())
                        .bf16(BF16)
                        .fp16(FP16)
                        .fp32(FP32)
                        .build();

                NvidiaGPUEntity gpuEntity = nvidiaGPUDao.selectByName(entity.getProductName());
                if (gpuEntity == null) {
                    nvidiaGPUDao.insert(entity);
                } else {
                    entity.setId(gpuEntity.getId());
                    nvidiaGPUDao.updateById(entity);
                }
                //因为网址根据IP限流了,所以延时爬
                Thread.sleep(25000);
            }
        }
    }
}

;