Added 1st crawler
parent
797ab657d8
commit
2f5cc56b5f
@ -0,0 +1,65 @@
|
|||||||
|
const puppeteer = require('puppeteer');
|
||||||
|
const genericVinParserFactory = require('../processors/generics/generic-vin-parser');
|
||||||
|
module.exports = {
|
||||||
|
cronString: '46 * * * *',
|
||||||
|
run: async function () {
|
||||||
|
const startingPoint = 'https://topclassiccarsforsale.com/amc';
|
||||||
|
const browser = await puppeteer.launch({
|
||||||
|
headless: true
|
||||||
|
});
|
||||||
|
const page = await browser.newPage();
|
||||||
|
await page.setViewport({
|
||||||
|
width: 1200,
|
||||||
|
height: 800
|
||||||
|
});
|
||||||
|
await page.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36");
|
||||||
|
console.log('Loading page...');
|
||||||
|
await page.goto(startingPoint, { timeout: 60000 });
|
||||||
|
console.log('page loaded');
|
||||||
|
|
||||||
|
//get total page count
|
||||||
|
const visiblePageNumbers = await Promise.all((await page.$$('span.pgs > a')).map(async element => await page.evaluate(el => el.textContent, element)))
|
||||||
|
const highestPage = Math.max(...visiblePageNumbers.map(num => parseInt(num)));
|
||||||
|
for(let pageNumber = 1;pageNumber < highestPage;pageNumber++) {
|
||||||
|
console.log('loading page #', pageNumber)
|
||||||
|
await page.goto(`${startingPoint}/page/${pageNumber}/`, {timeout: 60000});
|
||||||
|
const cars = await module.exports.processPage(page);
|
||||||
|
console.log(cars);
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
|
processPage: async function(page) {
|
||||||
|
const candidateLeads = [];
|
||||||
|
const vinParser = genericVinParserFactory({
|
||||||
|
vinElementSelector: `ul.fullinfo li`,
|
||||||
|
vinRegex: /(?<vin>A\d\w397\w\d{6})/i
|
||||||
|
})
|
||||||
|
|
||||||
|
//start with this page, pull all the carname elements for their links
|
||||||
|
let links = await page.$$('.carname');
|
||||||
|
links = await Promise.all(links.map(async element => await page.evaluate(el => el.href, element)));
|
||||||
|
for(let i = 0;i < links.length;i++){
|
||||||
|
let link = links[i];
|
||||||
|
const newTab = await page.browser().newPage();
|
||||||
|
await newTab.setViewport({
|
||||||
|
width: 1200,
|
||||||
|
height: 800
|
||||||
|
});
|
||||||
|
await newTab.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36");
|
||||||
|
console.log('Loading page...');
|
||||||
|
await newTab.goto(link, { timeout: 60000 });
|
||||||
|
console.log('loaded new tab', link);
|
||||||
|
const possibleVin = await vinParser(newTab);
|
||||||
|
if(possibleVin){
|
||||||
|
//candidate found
|
||||||
|
candidateLeads.push({
|
||||||
|
vin: possibleVin,
|
||||||
|
url: link
|
||||||
|
});
|
||||||
|
}
|
||||||
|
await newTab.close();
|
||||||
|
|
||||||
|
}
|
||||||
|
return candidateLeads;
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue