You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ListingExtractor/crawlers/topclassiccarsforsale.crawl...

76 lines
3.1 KiB
JavaScript

const puppeteer = require('puppeteer');
const genericVinParserFactory = require('../processors/generics/generic-vin-parser');
const superagent = require('superagent');
module.exports = {
cronString: '0 22 * * *',
run: async function () {
const startingPoint = 'https://topclassiccarsforsale.com/amc';
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.setViewport({
width: 1200,
height: 800
});
await page.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36");
console.log('Loading page...');
await page.goto(startingPoint, { timeout: 60000 });
console.log('page loaded');
//get total page count
const visiblePageNumbers = await Promise.all((await page.$$('span.pgs > a')).map(async element => await page.evaluate(el => el.textContent, element)))
const highestPage = Math.max(...visiblePageNumbers.map(num => parseInt(num)));
for(let pageNumber = 1;pageNumber < highestPage;pageNumber++) {
console.log('loading page #', pageNumber)
await page.goto(`${startingPoint}/page/${pageNumber}/`, {timeout: 60000});
const cars = await module.exports.processPage(page);
console.log(cars);
cars.forEach(car => {
superagent.post('http://localhost:3000/lead/createFromCrawler')
.send({
url: car.url
}).end((err, res) => {
if(err){
console.error('Failed to send lead', err);
}
});
})
}
},
processPage: async function(page) {
const candidateLeads = [];
const vinParser = genericVinParserFactory({
vinElementSelector: `ul.fullinfo li`,
vinRegex: /(?<vin>A\d\w397\w\d{6})/i
})
//start with this page, pull all the carname elements for their links
let links = await page.$$('.carname');
links = await Promise.all(links.map(async element => await page.evaluate(el => el.href, element)));
for(let i = 0;i < links.length;i++){
let link = links[i];
const newTab = await page.browser().newPage();
await newTab.setViewport({
width: 1200,
height: 800
});
await newTab.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36");
console.log('Loading page...');
await newTab.goto(link, { timeout: 60000 });
console.log('loaded new tab', link);
const possibleVin = await vinParser(newTab);
if(possibleVin){
//candidate found
candidateLeads.push({
vin: possibleVin,
url: link
});
}
await newTab.close();
}
return candidateLeads;
}
}