Added parsing for vins

crawler
Edward Peterson 3 years ago
parent f52cd359ae
commit 9bc932a90b

@ -36,8 +36,10 @@ app.post('/convertGalleryToHar', async (req, res) => {
try {
const payloads = await run(url, processor);
res.status(200).json({
vin: payloads.vin,
mileage: payloads.mileage,
log: {
entries: payloads
entries: payloads.payloads
}
// payloads
})
@ -65,7 +67,12 @@ async function run(url, processor) {
await page.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36");
console.log('Loading page...');
await page.goto(url, { timeout: 60000 });
console.log('page loaded')
console.log('page loaded');
console.log('attempting to parse fields');
const vin = await processor.parseVIN(page);
console.log('parsed VIN:', vin);
const mileage = await processor.parseMileage(page);
console.log('parsed Mileage:', mileage);
const galleryUrls = await processor.execute(page);
@ -90,7 +97,11 @@ async function run(url, processor) {
})))
console.log('URLS done downloading')
await browser.close();
return payloads;
return {
vin,
payloads,
mileage
};
}
// run();

1575
package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -1,9 +1,8 @@
//sample url = https://BarrettJacksonCDN.azureedge.net/staging/carlist/items/Fullsize/Cars/263634/263634_Rear_3-4_Web.jpg
//sample urlt= https://BarrettJacksonCDN.azureedge.net/staging/carlist/items/Thumbnails/Cars/263634/263634_Misc_6ca0ca58-2b21-482d-ba01-4078083b5520_T.jpg
const genericVinParserFactory = require("./generic-vin-parser");
const _ = require('lodash');
const path = require('path');
module.exports = {
baseUrl: 'www.barrett-jackson.com',
execute: async function (page) {
@ -21,6 +20,12 @@ module.exports = {
const sources = sourcesFromThumbnails;
return sources;
},
parseVIN: genericVinParserFactory({
vinElementSelector: `span#VIN`
}),
parseMileage: async function (page) {
return null;
}
}
function convertThumbnailUrlToFullSize(url) {

@ -1,5 +1,6 @@
const genericBootstrapFactory = require("./generic-bootstrap")
const genericBootstrapFactory = require("./generic-bootstrap");
const genericVinParserFactory = require("./generic-vin-parser");
const _ = require('lodash')
module.exports = {
execute: genericBootstrapFactory({
pageLoadIndicator: '.gallery',
@ -7,5 +8,11 @@ module.exports = {
carouselTrigger: '.gallery > a:nth-child(1)'
}),
baseUrl: 'bringatrailer.com'
baseUrl: 'bringatrailer.com',
parseVIN: genericVinParserFactory({
vinElementSelector: `div.item ul li`
}),
parseMileage: async function (page) {
return null;
}
}

@ -1,4 +1,7 @@
const genericBootstrapFactory = require("./generic-bootstrap")
const genericBootstrapFactory = require("./generic-bootstrap");
const genericVinParserFactory = require("./generic-vin-parser");
const _ = require('lodash');
module.exports = {
execute: genericBootstrapFactory({
@ -7,5 +10,12 @@ module.exports = {
vinSelector: 'li.p-vin > span:nth-child(2)',
carouselTrigger: 'div.swiper-slide-active > div > img.u-photo'
}),
baseUrl :'classiccars.com'
baseUrl: 'classiccars.com',
parseVIN: genericVinParserFactory({
vinElementSelector: `li.border-btm.p-vin span.w40.d-inline-block.b.fs-14.gray`
}),
parseMileage: async function (page) {
return null;
}
//li.border-btm.p-vin span.w40.d-inline-block.b.fs-14.gray
}

@ -1,3 +1,6 @@
const _ = require('lodash');
const genericVinParserFactory = require("./generic-vin-parser");
module.exports = {
baseUrl: 'davidsclassiccars.com',
execute: async function (page) {
@ -13,5 +16,12 @@ module.exports = {
}));
return sources;
},
parseVIN: genericVinParserFactory({
vinElementSelector: `table.car-features tbody tr td.options-full span`
}),
parseMileage: async function (page) {
return null;
}
}

@ -1,5 +1,5 @@
const _ = require('lodash');
const path = require('path');
const genericVinParserFactory = require("./generic-vin-parser");
module.exports = {
baseUrl: 'ebay.com',
execute: async function (page) {
@ -77,6 +77,12 @@ module.exports = {
return sources;
}
},
parseVIN: genericVinParserFactory({
vinElementSelector: `div.ux-layout-section__row div.ux-labels-values__values div.ux-labels-values__values-content div span.ux-textspans`
}),
parseMileage: async function (page) {
return null;
}
}
function convertThumbnailUrlToFullSize(url) {

@ -0,0 +1,12 @@
const _ = require('lodash');
module.exports = function(config) {
return async function (page) {
const vinSelector = config.vinElementSelector;
const vinRegex = /(?<vin>A\d\w\d{3}\w\d{6})/i;
const possibleVinElements = await page.$$(vinSelector);
const evaluatedVinElements = await Promise.all(possibleVinElements.map(async element => await page.evaluate(el => el.textContent, element)));
const correctElement = _.find(evaluatedVinElements, element => vinRegex.test(element));
return correctElement ? vinRegex.exec(correctElement).groups.vin : null;
}
}

@ -1,4 +1,6 @@
const genericBootstrapFactory = require("./generic-bootstrap")
const genericBootstrapFactory = require("./generic-bootstrap");
const genericVinParserFactory = require("./generic-vin-parser");
module.exports = {
execute: genericBootstrapFactory({
@ -7,6 +9,12 @@ module.exports = {
vinSelector: '.leading-loose > li:nth-child(2) > span:nth-child(2)',
carouselTrigger: '.bg-center'
}),
baseUrl :'hemmings.com'
baseUrl :'hemmings.com',
parseVIN: genericVinParserFactory({
vinElementSelector: `div.py-4.px-4.flex.flex-col.lg:flex-row.flex-wrap div.w-full.lg:w-1/2.my-2 div.flex.flex-col div.text-lg.lg:text-xl.whitespace-normal`
}),
parseMileage: async function (page) {
return null;
}
}

@ -1,3 +1,4 @@
const genericVinParserFactory = require("./generic-vin-parser");
module.exports = {
baseUrl: 'topclassiccarsforsale.com',
async execute(page) {
@ -11,5 +12,11 @@ module.exports = {
return { url: this.baseUrl + src };
}));
return sources;
},
parseVIN: genericVinParserFactory({
vinElementSelector: `ul.fullinfo li`
}),
parseMileage: async function (page) {
return null;
}
}
Loading…
Cancel
Save