2024年09月17日 建站教程
nodejs如何利用crawler爬取数据在通过node-xlsx插件并生成Excel文件,下面web建站小编给大家详细介绍一下实现方法。
1、安装脚手架:
yarn add crawler
2、代码实现介绍:
const Crawler = require("crawler");
const fs = require("fs")
const { resolve } = require("path")
let crawler = new Crawler({
timeout: 10000,
jQuery: true,
});
let crawler = new Crawler({
timeout: 10000,
jQuery: true,
});
function getPokemon() {
let uri = "" // 宝可梦图鉴地址
let data = []
return new Promise((resolve, reject) => {
crawler.queue({
uri,
callback: (err, res, done) => {
if (err) reject(err);
let $ = res.$;
try {
let $tr = $(".roundy.eplist tr");
$tr.each((i, el) => {
let $td = $(el).find("td");
let _code = $td.eq(1).text().split("\n")[0]
let _name = $td.eq(3).text().split("\n")[0]
let _attr = $td.eq(4).text().split("\n")[0]
let _other = $td.eq(5).text().split("\n")[0]
_attr = _other.indexOf("属性") != -1 ? _attr : `${_attr}+${_other}`
if (_code) {
data.push([_code, _name, _attr])
}
})
done();
resolve(data)
} catch (err) {
done()
reject(err)
}
}
})
})
}
//备注:代码中引入了$,所以需要开启 jQuery 模式
1、安装脚手架:
yarn add node-xlsx
2、代码实现介绍:
const xlsx = require("node-xlsx")
getPokemon().then(async data => {
let title = ["编号", "宝可梦", "属性"]
let list = [{
name: "关都",
data: [
title,
...data
]
}];
const sheetOptions = { '!cols': [{ wch: 15 }, { wch: 20 }, { wch: 20 }] };
const buffer = await xlsx.build(list, { sheetOptions })
try {
await fs.writeFileSync(resolve(__dirname, "data/pokemon.xlsx"), buffer, "utf8")
} catch (error) { }
})
1、安装脚手架:
yarn add express
2、代码实现介绍:
const express = require("express")
const app = express();
const listenPort = 3000;
app.get("/pokemon",(req,res)=>{
let data = xlsx.parse(resolve(__dirname, "data/pokemon.xlsx"));
res.send(data)
})
app.listen(listenPort, () => {
console.log(`Server running at http://localhost:${listenPort}/`)
})
本文链接:http://so.lmcjl.com/news/13232/