Added API endpoints to trigger process

crawler
Edward Peterson 3 years ago
parent 5e981cedca
commit be1ab59bac

@ -1,158 +1,85 @@
const puppeteer = require('puppeteer')
const fs = require('fs');
const superagent = require('superagent');
const path = require('path');
const uuid = require('uuid');
const express = require('express');
const glob = require('glob');
const _ = require('lodash');
const app = express();
var bodyParser = require('body-parser')
var url = '';
process.argv.forEach((val, index, array) => {
console.log(val)
if(val.startsWith('url=')){
url = val.substring(4);
}
const processors = [];
//load the processor files
glob('processors/*.processor.js', (error, matches) => {
console.log(matches);
_.forEach(matches, file => {
const processor = require(path.resolve('./', file));
processors.push(processor);
})
console.log(`${matches.length} processors loaded`);
})
if(!url || url.length === 0) {
console.log('Need to supply url');
process.exit();
}
const processors = [{
baseUrl: 'bringatrailer.com',
pageLoadIndicator: '.gallery',
vinSelector: 'body > main > div.listing > div:nth-child(3) > div.column.column-right.column-right-force > div.essentials > div:nth-child(5) > ul > li:nth-child(1) > a',
carouselTrigger: '.gallery > a:nth-child(1)'
app.use(bodyParser.json())
app.post('/convertGalleryToHar', async (req, res) => {
const url = req.body.url;
console.log(url);
// get processor
const processor = _.find(processors, (processor) => url.includes(processor.baseUrl));
if (!processor) {
return res.status(400).json({
message: 'Could not find processor for url'
})
}
console.log('Processor found');
try {
const payloads = await run(url, processor);
console.log(payloads);
}, {
baseUrl: 'classiccars.com',
pageLoadIndicator: '#ListingCarousel',
vinSelector: 'li.p-vin > span:nth-child(2)',
carouselTrigger: 'div.swiper-slide-active > div > img.u-photo'
}]
var processorConfig = {}
processors.forEach(config => {
if(url.includes(config.baseUrl)){
processorConfig = config;
res.status(200).json({
log: {
entries: payloads
}
// payloads
})
} catch (error) {
console.log(error);
res.status(500).json(error);
}
})
if(!processorConfig) {
console.log('Unsupported URL');
process.exit();
}
app.listen(2667);
const run = async () => {
const browser = await puppeteer.launch({
headless:true
});
const page = await browser.newPage();
await page.goto(url);
// Type into search box.
// await page.type('.devsite-search-field', 'Headless Chrome');
// Wait for suggest overlay to appear and click "show all results".
const allResultsSelector = processorConfig.pageLoadIndicator;
await page.waitForSelector(allResultsSelector);
var vin;
async function run(url, processor) {
const browser = await puppeteer.launch({
headless: true
});
const vinSelector = processorConfig.vinSelector;
await page.waitForSelector(vinSelector).then(async () => {
let element = await page.$(vinSelector)
vin = await page.evaluate(el => el.textContent, element);
}).catch(error => {
console.error('Unable to grab VIN, falling back to UUID');
vin = uuid.v4();
});
console.log(vin);
// await page.click(allResultsSelector);
const page = await browser.newPage();
console.log('Loading page...');
await page.goto(url, { timeout: 60000 });
console.log('page loaded')
const galleryUrls = await processor.execute(page);
const client = await page.target().createCDPSession()
await client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: './images',
})
const firstImageLinkSelector = processorConfig.carouselTrigger;
await page.click(firstImageLinkSelector);
await page.waitForSelector('.pswp__img')
console.log('Gallery is loaded, fetching URLS')
const galleryUrls = await page.evaluate(downloadGallery, vin);
console.log('Done collecting URLS', galleryUrls.length);
await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => {
const fileType = path.extname(image.url);
var stream = fs.createWriteStream(path.resolve('images', `${image.fileName}${fileType}`));
stream.on('finish', resolve);
superagent.get(image.url).pipe(stream);
})))
console.log('URLS done downloading')
await browser.close();
};
downloadGallery = async (vin) => {
var galleryUrls = [];
async function ensureCarouselVisible() {
var imgWrap;
do {
imgWrap = document.elementFromPoint(100, 100);
console.log(imgWrap.classList);
await delay(50);
} while(imgWrap.classList.contains('pswp__img--placeholder'));
}
function delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// Find the active image, surround it in an anchor tag, then click it.
async function downloadImage(id) {
const imgWrap = document.elementFromPoint(300, 300);
const children = imgWrap.querySelectorAll('.pswp__img');
const img = children[children.length - 1];
console.log(img, imgWrap);
// debugger;
// Full image hasn't loaded yet
const src = imgWrap.src.split('?')[0]// get rid of querystring
return downloadSrc(src, id);
}
function downloadSrc(src, id) {
galleryUrls.push({
fileName: id,
url:src
})
}
function nextImage() {
document.querySelector('.pswp__button.pswp__button--arrow--right').click();
console.log('Done collecting URLS', galleryUrls.length);
const payloads = await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => {
superagent.get(image.url).responseType('blob').then(function (response) {
if (response.statusCode == 200) {
return resolve({
response: {
content: {
mimeType: response.headers["content-type"],
encoding: 'base64',
text: response.body.toString('base64')
}
}
});
}
function getCounterValue() {
const [position, total] = document.querySelector('.pswp__counter').textContent.split('/');
return parseInt(position.trim(), 10);
}
function run() {
return new Promise(async (resolve, reject) => {
await ensureCarouselVisible();
await delay(500);
const firstValue = getCounterValue();
do {
await downloadImage(`${vin}-${getCounterValue()}`);
nextImage();
await delay(100);
} while(getCounterValue() !== firstValue);
return resolve(galleryUrls)
})
}
return new Promise((resolve, reject) => {
run().then(resolve);
});
}
reject();
});
})))
console.log('URLS done downloading')
await browser.close();
return payloads;
}
run();
// run();

448
package-lock.json generated

@ -47,6 +47,15 @@
"@types/node": "*"
}
},
"accepts": {
"version": "1.3.8",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
"integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==",
"requires": {
"mime-types": "~2.1.34",
"negotiator": "0.6.3"
}
},
"agent-base": {
"version": "6.0.2",
"resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz",
@ -63,6 +72,11 @@
"color-convert": "^1.9.0"
}
},
"array-flatten": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
"integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg=="
},
"asap": {
"version": "2.0.6",
"resolved": "https://registry.npmjs.org/asap/-/asap-2.0.6.tgz",
@ -93,13 +107,46 @@
"readable-stream": "^3.4.0"
}
},
"body-parser": {
"version": "1.20.1",
"resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz",
"integrity": "sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==",
"requires": {
"bytes": "3.1.2",
"content-type": "~1.0.4",
"debug": "2.6.9",
"depd": "2.0.0",
"destroy": "1.2.0",
"http-errors": "2.0.0",
"iconv-lite": "0.4.24",
"on-finished": "2.4.1",
"qs": "6.11.0",
"raw-body": "2.5.1",
"type-is": "~1.6.18",
"unpipe": "1.0.0"
},
"dependencies": {
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
}
}
},
"brace-expansion": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
"integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
"balanced-match": "^1.0.0"
}
},
"buffer": {
@ -116,6 +163,11 @@
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
"integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ=="
},
"bytes": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
"integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="
},
"call-bind": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz",
@ -176,6 +228,29 @@
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg=="
},
"content-disposition": {
"version": "0.5.4",
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
"integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==",
"requires": {
"safe-buffer": "5.2.1"
}
},
"content-type": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz",
"integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA=="
},
"cookie": {
"version": "0.5.0",
"resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz",
"integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw=="
},
"cookie-signature": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
"integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ=="
},
"cookiejar": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/cookiejar/-/cookiejar-2.1.3.tgz",
@ -214,6 +289,16 @@
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="
},
"depd": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
"integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw=="
},
"destroy": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz",
"integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg=="
},
"devtools-protocol": {
"version": "0.0.1056733",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1056733.tgz",
@ -228,6 +313,16 @@
"wrappy": "1"
}
},
"ee-first": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
"integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="
},
"encodeurl": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
"integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w=="
},
"end-of-stream": {
"version": "1.4.4",
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
@ -244,11 +339,74 @@
"is-arrayish": "^0.2.1"
}
},
"escape-html": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
"integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="
},
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg=="
},
"etag": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
"integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg=="
},
"express": {
"version": "4.18.2",
"resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
"integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==",
"requires": {
"accepts": "~1.3.8",
"array-flatten": "1.1.1",
"body-parser": "1.20.1",
"content-disposition": "0.5.4",
"content-type": "~1.0.4",
"cookie": "0.5.0",
"cookie-signature": "1.0.6",
"debug": "2.6.9",
"depd": "2.0.0",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"finalhandler": "1.2.0",
"fresh": "0.5.2",
"http-errors": "2.0.0",
"merge-descriptors": "1.0.1",
"methods": "~1.1.2",
"on-finished": "2.4.1",
"parseurl": "~1.3.3",
"path-to-regexp": "0.1.7",
"proxy-addr": "~2.0.7",
"qs": "6.11.0",
"range-parser": "~1.2.1",
"safe-buffer": "5.2.1",
"send": "0.18.0",
"serve-static": "1.15.0",
"setprototypeof": "1.2.0",
"statuses": "2.0.1",
"type-is": "~1.6.18",
"utils-merge": "1.0.1",
"vary": "~1.1.2"
},
"dependencies": {
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
}
}
},
"extract-zip": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
@ -273,6 +431,35 @@
"pend": "~1.2.0"
}
},
"finalhandler": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
"integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
"requires": {
"debug": "2.6.9",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"on-finished": "2.4.1",
"parseurl": "~1.3.3",
"statuses": "2.0.1",
"unpipe": "~1.0.0"
},
"dependencies": {
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
}
},
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
}
}
},
"form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
@ -301,6 +488,16 @@
}
}
},
"forwarded": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
"integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow=="
},
"fresh": {
"version": "0.5.2",
"resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
"integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q=="
},
"fs-constants": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
@ -335,16 +532,15 @@
}
},
"glob": {
"version": "7.2.3",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
"integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
"version": "8.0.3",
"resolved": "https://registry.npmjs.org/glob/-/glob-8.0.3.tgz",
"integrity": "sha512-ull455NHSHI/Y1FqGaaYFaLGkNMMJbavMrEGFXG/PGrg6y7sutWHUHrz6gy6WEBH6akM1M414dWKCNs+IhKdiQ==",
"requires": {
"fs.realpath": "^1.0.0",
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "^3.1.1",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
"minimatch": "^5.0.1",
"once": "^1.3.0"
}
},
"has": {
@ -370,6 +566,18 @@
"resolved": "https://registry.npmjs.org/hexoid/-/hexoid-1.0.0.tgz",
"integrity": "sha512-QFLV0taWQOZtvIRIAdBChesmogZrtuXvVWsFHZTk2SU+anspqZ2vMnoLg7IE1+Uk16N19APic1BuF8bC8c2m5g=="
},
"http-errors": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz",
"integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==",
"requires": {
"depd": "2.0.0",
"inherits": "2.0.4",
"setprototypeof": "1.2.0",
"statuses": "2.0.1",
"toidentifier": "1.0.1"
}
},
"https-proxy-agent": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz",
@ -379,6 +587,14 @@
"debug": "4"
}
},
"iconv-lite": {
"version": "0.4.24",
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
"integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
"requires": {
"safer-buffer": ">= 2.1.2 < 3"
}
},
"ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
@ -407,6 +623,11 @@
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
},
"ipaddr.js": {
"version": "1.9.1",
"resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
"integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g=="
},
"is-arrayish": {
"version": "0.2.1",
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz",
@ -427,6 +648,11 @@
"resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz",
"integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg=="
},
"lodash": {
"version": "4.17.21",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz",
"integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg=="
},
"lru-cache": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
@ -435,6 +661,16 @@
"yallist": "^4.0.0"
}
},
"media-typer": {
"version": "0.3.0",
"resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
"integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ=="
},
"merge-descriptors": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
"integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w=="
},
"methods": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
@ -459,11 +695,11 @@
}
},
"minimatch": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.0.tgz",
"integrity": "sha512-9TPBGGak4nHfGZsPBohm9AWg6NoT7QTCehS3BIJABslyZbzxfV78QM2Y6+i741OPZIafFAaiiEMh5OyIrJPgtg==",
"requires": {
"brace-expansion": "^1.1.7"
"brace-expansion": "^2.0.1"
}
},
"mkdirp-classic": {
@ -476,6 +712,11 @@
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
"integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w=="
},
"negotiator": {
"version": "0.6.3",
"resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
"integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg=="
},
"node-fetch": {
"version": "2.6.7",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz",
@ -489,6 +730,14 @@
"resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.2.tgz",
"integrity": "sha512-z+cPxW0QGUp0mcqcsgQyLVRDoXFQbXOwBaqyF7VIgI4TWNQsDHrBpUQslRmIfAoYWdYzs6UlKJtB2XJpTaNSpQ=="
},
"on-finished": {
"version": "2.4.1",
"resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
"integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
"requires": {
"ee-first": "1.1.1"
}
},
"once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@ -516,11 +765,21 @@
"lines-and-columns": "^1.1.6"
}
},
"parseurl": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
"integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="
},
"path-is-absolute": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg=="
},
"path-to-regexp": {
"version": "0.1.7",
"resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
"integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ=="
},
"path-type": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz",
@ -536,6 +795,15 @@
"resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz",
"integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA=="
},
"proxy-addr": {
"version": "2.0.7",
"resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
"integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
"requires": {
"forwarded": "0.2.0",
"ipaddr.js": "1.9.1"
}
},
"proxy-from-env": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
@ -588,6 +856,22 @@
"side-channel": "^1.0.4"
}
},
"range-parser": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
"integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg=="
},
"raw-body": {
"version": "2.5.1",
"resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz",
"integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==",
"requires": {
"bytes": "3.1.2",
"http-errors": "2.0.0",
"iconv-lite": "0.4.24",
"unpipe": "1.0.0"
}
},
"readable-stream": {
"version": "3.6.0",
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz",
@ -609,6 +893,38 @@
"integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==",
"requires": {
"glob": "^7.1.3"
},
"dependencies": {
"brace-expansion": {
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
}
},
"glob": {
"version": "7.2.3",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
"integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
"requires": {
"fs.realpath": "^1.0.0",
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "^3.1.1",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
}
},
"minimatch": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
"requires": {
"brace-expansion": "^1.1.7"
}
}
}
},
"safe-buffer": {
@ -616,6 +932,11 @@
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
"integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="
},
"safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
"integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
},
"semver": {
"version": "7.3.8",
"resolved": "https://registry.npmjs.org/semver/-/semver-7.3.8.tgz",
@ -624,6 +945,69 @@
"lru-cache": "^6.0.0"
}
},
"send": {
"version": "0.18.0",
"resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
"integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
"requires": {
"debug": "2.6.9",
"depd": "2.0.0",
"destroy": "1.2.0",
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"etag": "~1.8.1",
"fresh": "0.5.2",
"http-errors": "2.0.0",
"mime": "1.6.0",
"ms": "2.1.3",
"on-finished": "2.4.1",
"range-parser": "~1.2.1",
"statuses": "2.0.1"
},
"dependencies": {
"debug": {
"version": "2.6.9",
"resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
"integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
"requires": {
"ms": "2.0.0"
},
"dependencies": {
"ms": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
}
}
},
"mime": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
"integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg=="
},
"ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
}
}
},
"serve-static": {
"version": "1.15.0",
"resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz",
"integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==",
"requires": {
"encodeurl": "~1.0.2",
"escape-html": "~1.0.3",
"parseurl": "~1.3.3",
"send": "0.18.0"
}
},
"setprototypeof": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
"integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
},
"side-channel": {
"version": "1.0.4",
"resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz",
@ -634,6 +1018,11 @@
"object-inspect": "^1.9.0"
}
},
"statuses": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
"integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ=="
},
"string_decoder": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
@ -695,11 +1084,25 @@
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
"integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg=="
},
"toidentifier": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
"integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA=="
},
"tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
},
"type-is": {
"version": "1.6.18",
"resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
"integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
"requires": {
"media-typer": "0.3.0",
"mime-types": "~2.1.24"
}
},
"unbzip2-stream": {
"version": "1.4.3",
"resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz",
@ -709,16 +1112,31 @@
"through": "^2.3.8"
}
},
"unpipe": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
"integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ=="
},
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
"integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
},
"utils-merge": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
"integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA=="
},
"uuid": {
"version": "9.0.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
"integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg=="
},
"vary": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
"integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg=="
},
"webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",

@ -9,6 +9,10 @@
"author": "Edward Peterson",
"license": "ISC",
"dependencies": {
"body-parser": "^1.20.1",
"express": "^4.18.2",
"glob": "^8.0.3",
"lodash": "^4.17.21",
"puppeteer": "^19.2.0",
"superagent": "^8.0.3",
"uuid": "^9.0.0"

@ -0,0 +1,11 @@
const genericBootstrapFactory = require("./generic-bootstrap")
module.exports = {
execute: genericBootstrapFactory({
pageLoadIndicator: '.gallery',
vinSelector: 'body > main > div.listing > div:nth-child(3) > div.column.column-right.column-right-force > div.essentials > div:nth-child(5) > ul > li:nth-child(1) > a',
carouselTrigger: '.gallery > a:nth-child(1)'
}),
baseUrl: 'bringatrailer.com'
}

@ -0,0 +1,11 @@
const genericBootstrapFactory = require("./generic-bootstrap")
module.exports = {
execute: genericBootstrapFactory({
baseUrl: 'classiccars.com',
pageLoadIndicator: '#ListingCarousel',
vinSelector: 'li.p-vin > span:nth-child(2)',
carouselTrigger: 'div.swiper-slide-active > div > img.u-photo'
}),
baseUrl :'classiccars.com'
}

@ -0,0 +1,105 @@
const uuid = require('uuid');
module.exports = function (processorConfig) {
downloadGallery = async (vin) => {
var galleryUrls = [];
async function ensureCarouselVisible() {
var imgWrap;
do {
imgWrap = document.elementFromPoint(100, 100);
console.log(imgWrap.classList);
await delay(50);
} while (imgWrap.classList.contains('pswp__img--placeholder'));
}
function delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// Find the active image, surround it in an anchor tag, then click it.
async function downloadImage(id) {
const imgWrap = document.elementFromPoint(300, 300);
const children = imgWrap.querySelectorAll('.pswp__img');
const img = children[children.length - 1];
console.log(img, imgWrap);
// debugger;
// Full image hasn't loaded yet
const src = imgWrap.src.split('?')[0]// get rid of querystring
return downloadSrc(src, id);
}
function downloadSrc(src, id) {
galleryUrls.push({
fileName: id,
url: src
})
}
function nextImage() {
document.querySelector('.pswp__button.pswp__button--arrow--right').click();
}
function getCounterValue() {
const [position, total] = document.querySelector('.pswp__counter').textContent.split('/');
return parseInt(position.trim(), 10);
}
function run() {
return new Promise(async (resolve, reject) => {
await ensureCarouselVisible();
await delay(500);
const firstValue = getCounterValue();
do {
await downloadImage(`${vin}-${getCounterValue()}`);
nextImage();
await delay(100);
} while (getCounterValue() !== firstValue);
return resolve(galleryUrls)
})
}
return new Promise((resolve, reject) => {
run().then(resolve);
});
}
return async function (page) {
console.log('Running generic boostrap extractor')
// Type into search box.
// await page.type('.devsite-search-field', 'Headless Chrome');
// Wait for suggest overlay to appear and click "show all results".
const allResultsSelector = processorConfig.pageLoadIndicator;
await page.waitForSelector(allResultsSelector);
var vin;
const vinSelector = processorConfig.vinSelector;
await page.waitForSelector(vinSelector).then(async () => {
let element = await page.$(vinSelector)
vin = await page.evaluate(el => el.textContent, element);
}).catch(error => {
console.error('Unable to grab VIN, falling back to UUID');
vin = uuid.v4();
});
console.log(vin);
// await page.click(allResultsSelector);
const client = await page.target().createCDPSession()
await client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: './images',
})
const firstImageLinkSelector = processorConfig.carouselTrigger;
await page.click(firstImageLinkSelector);
await page.waitForSelector('.pswp__img')
console.log('Gallery is loaded, fetching URLS')
const galleryUrls = await page.evaluate(downloadGallery, vin);
return galleryUrls
}
}
Loading…
Cancel
Save