|
|
|
@ -52,6 +52,14 @@ module.exports = {
|
|
|
|
//start with this page, pull all the carname elements for their links
|
|
|
|
//start with this page, pull all the carname elements for their links
|
|
|
|
let links = await page.$$('.carname');
|
|
|
|
let links = await page.$$('.carname');
|
|
|
|
links = await Promise.all(links.map(async element => await page.evaluate(el => el.href, element)));
|
|
|
|
links = await Promise.all(links.map(async element => await page.evaluate(el => el.href, element)));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
links = await this.filterCompletedLinks(links);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
console.log(`Found ${links.length} unexplored links...`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for(let i = 0;i < links.length;i++){
|
|
|
|
for(let i = 0;i < links.length;i++){
|
|
|
|
let link = links[i];
|
|
|
|
let link = links[i];
|
|
|
|
const newTab = await page.browser().newPage();
|
|
|
|
const newTab = await page.browser().newPage();
|
|
|
|
@ -75,5 +83,23 @@ module.exports = {
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return candidateLeads;
|
|
|
|
return candidateLeads;
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
filterCompletedLinks: function(links){
|
|
|
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
|
|
|
superagent.post('http://localhost:3000/lead/crawler/filterUrls')
|
|
|
|
|
|
|
|
.send({
|
|
|
|
|
|
|
|
urls: links
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
.set('authorization', `Basic ${process.env.crawlerToken}`)
|
|
|
|
|
|
|
|
.set('Accept', 'application/json')
|
|
|
|
|
|
|
|
.end((err, res) => {
|
|
|
|
|
|
|
|
if(err){
|
|
|
|
|
|
|
|
console.error('Failed to filter urls', err);
|
|
|
|
|
|
|
|
return reject(err);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
resolve(res.body?.urls);
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|