let axios = require("axios")
let cheerio = require("cheerio")
let fs = require("fs")
run()
function test(){
fs.writeFile("/home/xys/fuck.txt","asdasdasdda",{ flag: 'a+' },err=>{})
}
async function run(){
let baseUrl = "https://XXX"
let temURI = "XXXX" //
let a = await getUrl(baseUrl,temURI)
while(a){
a = await getUrl(baseUrl,a)
}
}
async function getUrl(baseUrl,temURI){
let resp = await axios.get(baseUrl+temURI)
let $ = cheerio.load(resp.data)
let data = $("#contents").children()[0].children;
data.map(e=>{
if(e.type === "text"){
fs.writeFile("/home/xys/fuck.txt",e.data+"\n",{ flag: 'a+' },err=>{})
}
})
let d = $('.pages .next');
if(d.length === 0){
return ""
}
d = d[0]
temURI = d.attribs.href
console.log(d.attribs.href)
return d.attribs.href
}
其实感觉用node写爬虫才是最“原生”的,就跟写js一样#就是js哈哈哈哈
只用了一会儿就爬完了内容,不过js的多线程还是不会用,晚上回家再看