const puppeteer = require('puppeteer');(async () => { const browser = await (puppeteer.launch({ //设置超时时间 timeout: 15000, //如果是访问https页面 此属性会忽略https错误 ignoreHTTPSErrors: true, // 打开开发者工具, 当此值为true时, headless总为false devtools: false, // 关闭headless模式, true不会打开浏览器 false会打开浏览器 headless: true })); const page = await browser.newPage(); await page.goto('https://xm.lianjia.com/ershoufang/pg1/'); /*最大页数*/ let max = 10; /*爬数据*/ let all_result = []; console.log('开始'); console.time('计时'); /*获取一页信息*/ const result = await page.evaluate(max => { let data = []; let _max = max||$('.house-lst-page-box a:nth-last-child(2)').html(); //获取最大页数 let elements = document.querySelectorAll('.sellListContent li'); //获取所有的li for (var element of elements){ // 循环 let title = element.querySelector('.title a').innerHTML; //抓取链接(href)属性 let url = element.querySelector('.noresultRecommend').href; let xiaoqu = element.querySelector('.houseInfo a').innerHTML; data.push({title,url,xiaoqu}); } return { data:data, max:_max }; },max); max = result.max; console.log('最大页数:'+max); console.log(result.data); console.log('当前页:1'); all_result = all_result.concat(result.data); for (var i = 2; i <= max; i++) { await page.goto('https://xm.lianjia.com/ershoufang/pg'+i+'/'); const result = await page.evaluate(() => { let data = []; let elements = document.querySelectorAll('.sellListContent li'); //获取所有的li for (var element of elements){ // 循环 let title = element.querySelector('.title a').innerHTML; //抓取链接(href)属性 let url = element.querySelector('.noresultRecommend').href; let xiaoqu = element.querySelector('.houseInfo a').innerHTML; data.push({title,url,xiaoqu}); } return { data:data, }; }); console.log(result.data); console.log('当前页:'+i); all_result = all_result.concat(result.data); } console.timeEnd('计时'); console.time('处理数据时间'); console.log('处理数据开始'); let xiaoqu_count_array = []; xiaoqu_count_array = dealData(all_result); /*处理数据*/ function dealData(all_result) { let xiaoqu_count = {}; let xiaoqu_count_array = []; /*聚合*/ for (var i = all_result.length - 1; i >= 0; i--) { var itme = all_result[i]; var _xiaoqu = itme.xiaoqu.replace(' ',''); xiaoqu_count[_xiaoqu] = xiaoqu_count[_xiaoqu]||0; xiaoqu_count[_xiaoqu]++; } /*变成数组*/ for (x in xiaoqu_count){ xiaoqu_count_array.push({'name':x,'count':xiaoqu_count[x]}); } return xiaoqu_count_array; } /*排序输出*/ function compare(propertyName) { return function(object1, object2) { var value1 = object1[propertyName]; var value2 = object2[propertyName]; if (value2 < value1) { return 1; } else if (value2 > value1) { return -1; } else { return 0; } } } xiaoqu_count_array.sort(compare("count")); console.log('xiaoqu_count_array_new'); console.log(xiaoqu_count_array[xiaoqu_count_array.length-1]); console.log('处理数据结束'); console.timeEnd('处理数据时间');})();/*page.setViewport*//*设置视窗*//*viewport
主要用到了page的几个方法
后续数据简单处理了下,得到房源最多的小区。