JavaScript爬虫 初步尝试

2021-11-01

let axios = require("axios")
let cheerio = require("cheerio")
let fs = require("fs")


run()

function test(){
    fs.writeFile("/home/xys/fuck.txt","asdasdasdda",{ flag: 'a+' },err=>{})
}

async function run(){
    let baseUrl = "https://XXX"
    let temURI = "XXXX" //
    let a = await getUrl(baseUrl,temURI)
    while(a){
        a = await getUrl(baseUrl,a)
    }
}

async function getUrl(baseUrl,temURI){
    let resp = await axios.get(baseUrl+temURI)
    let $ = cheerio.load(resp.data)
    let data = $("#contents").children()[0].children;
    data.map(e=>{
        if(e.type === "text"){
            fs.writeFile("/home/xys/fuck.txt",e.data+"\n",{ flag: 'a+' },err=>{})
        }
    })
    let d = $('.pages .next');
    if(d.length === 0){
        return ""
    }
    d = d[0]
    temURI = d.attribs.href
    console.log(d.attribs.href)
    return d.attribs.href
}

其实感觉用node写爬虫才是最“原生”的,就跟写js一样#就是js哈哈哈哈

只用了一会儿就爬完了内容,不过js的多线程还是不会用,晚上回家再看