diff --git a/index.js b/index.js index cf42238..82f7bbc 100644 --- a/index.js +++ b/index.js @@ -1,158 +1,85 @@ const puppeteer = require('puppeteer') -const fs = require('fs'); const superagent = require('superagent'); const path = require('path'); -const uuid = require('uuid'); +const express = require('express'); +const glob = require('glob'); +const _ = require('lodash'); +const app = express(); +var bodyParser = require('body-parser') -var url = ''; -process.argv.forEach((val, index, array) => { - console.log(val) - if(val.startsWith('url=')){ - url = val.substring(4); - } +const processors = []; +//load the processor files +glob('processors/*.processor.js', (error, matches) => { + console.log(matches); + _.forEach(matches, file => { + const processor = require(path.resolve('./', file)); + processors.push(processor); + }) + console.log(`${matches.length} processors loaded`); }) -if(!url || url.length === 0) { - console.log('Need to supply url'); - process.exit(); -} - - -const processors = [{ - baseUrl: 'bringatrailer.com', - pageLoadIndicator: '.gallery', - vinSelector: 'body > main > div.listing > div:nth-child(3) > div.column.column-right.column-right-force > div.essentials > div:nth-child(5) > ul > li:nth-child(1) > a', - carouselTrigger: '.gallery > a:nth-child(1)' +app.use(bodyParser.json()) +app.post('/convertGalleryToHar', async (req, res) => { + const url = req.body.url; + console.log(url); + // get processor + const processor = _.find(processors, (processor) => url.includes(processor.baseUrl)); + if (!processor) { + return res.status(400).json({ + message: 'Could not find processor for url' + }) + } + console.log('Processor found'); + try { + const payloads = await run(url, processor); + console.log(payloads); -}, { - baseUrl: 'classiccars.com', - pageLoadIndicator: '#ListingCarousel', - vinSelector: 'li.p-vin > span:nth-child(2)', - carouselTrigger: 'div.swiper-slide-active > div > img.u-photo' -}] -var processorConfig = {} -processors.forEach(config => { - if(url.includes(config.baseUrl)){ - processorConfig = config; + res.status(200).json({ + log: { + entries: payloads + } + // payloads + }) + } catch (error) { + console.log(error); + res.status(500).json(error); } + }) -if(!processorConfig) { - console.log('Unsupported URL'); - process.exit(); -} +app.listen(2667); -const run = async () => { - const browser = await puppeteer.launch({ - headless:true - }); - - const page = await browser.newPage(); - - await page.goto(url); - - // Type into search box. - // await page.type('.devsite-search-field', 'Headless Chrome'); - - // Wait for suggest overlay to appear and click "show all results". - const allResultsSelector = processorConfig.pageLoadIndicator; - await page.waitForSelector(allResultsSelector); - var vin; +async function run(url, processor) { + const browser = await puppeteer.launch({ + headless: true + }); - const vinSelector = processorConfig.vinSelector; - await page.waitForSelector(vinSelector).then(async () => { - let element = await page.$(vinSelector) - vin = await page.evaluate(el => el.textContent, element); - }).catch(error => { - console.error('Unable to grab VIN, falling back to UUID'); - vin = uuid.v4(); - }); - console.log(vin); - // await page.click(allResultsSelector); + const page = await browser.newPage(); + console.log('Loading page...'); + await page.goto(url, { timeout: 60000 }); + console.log('page loaded') + const galleryUrls = await processor.execute(page); - const client = await page.target().createCDPSession() - await client.send('Page.setDownloadBehavior', { - behavior: 'allow', - downloadPath: './images', - }) - const firstImageLinkSelector = processorConfig.carouselTrigger; - await page.click(firstImageLinkSelector); - await page.waitForSelector('.pswp__img') - console.log('Gallery is loaded, fetching URLS') - const galleryUrls = await page.evaluate(downloadGallery, vin); - console.log('Done collecting URLS', galleryUrls.length); - await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => { - const fileType = path.extname(image.url); - var stream = fs.createWriteStream(path.resolve('images', `${image.fileName}${fileType}`)); - stream.on('finish', resolve); - superagent.get(image.url).pipe(stream); - }))) - console.log('URLS done downloading') - - await browser.close(); - }; - downloadGallery = async (vin) => { - var galleryUrls = []; - async function ensureCarouselVisible() { - var imgWrap; - do { - imgWrap = document.elementFromPoint(100, 100); - console.log(imgWrap.classList); - await delay(50); - } while(imgWrap.classList.contains('pswp__img--placeholder')); - - } - - function delay(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); - } - - // Find the active image, surround it in an anchor tag, then click it. - async function downloadImage(id) { - const imgWrap = document.elementFromPoint(300, 300); - const children = imgWrap.querySelectorAll('.pswp__img'); - const img = children[children.length - 1]; - console.log(img, imgWrap); - // debugger; - // Full image hasn't loaded yet - const src = imgWrap.src.split('?')[0]// get rid of querystring - return downloadSrc(src, id); - } - - function downloadSrc(src, id) { - galleryUrls.push({ - fileName: id, - url:src - }) - } - - function nextImage() { - document.querySelector('.pswp__button.pswp__button--arrow--right').click(); + console.log('Done collecting URLS', galleryUrls.length); + const payloads = await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => { + superagent.get(image.url).responseType('blob').then(function (response) { + if (response.statusCode == 200) { + return resolve({ + response: { + content: { + mimeType: response.headers["content-type"], + encoding: 'base64', + text: response.body.toString('base64') + } + } + }); } - - function getCounterValue() { - const [position, total] = document.querySelector('.pswp__counter').textContent.split('/'); - return parseInt(position.trim(), 10); - } - - function run() { - return new Promise(async (resolve, reject) => { - await ensureCarouselVisible(); - await delay(500); - const firstValue = getCounterValue(); - do { - await downloadImage(`${vin}-${getCounterValue()}`); - nextImage(); - await delay(100); - } while(getCounterValue() !== firstValue); - return resolve(galleryUrls) - }) - - } - - return new Promise((resolve, reject) => { - run().then(resolve); - }); - } + reject(); + }); + }))) + console.log('URLS done downloading') + await browser.close(); + return payloads; +} - run(); \ No newline at end of file +// run(); \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 5530d02..5d1b78c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -47,6 +47,15 @@ "@types/node": "*" } }, + "accepts": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", + "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==", + "requires": { + "mime-types": "~2.1.34", + "negotiator": "0.6.3" + } + }, "agent-base": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", @@ -63,6 +72,11 @@ "color-convert": "^1.9.0" } }, + "array-flatten": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", + "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==" + }, "asap": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/asap/-/asap-2.0.6.tgz", @@ -93,13 +107,46 @@ "readable-stream": "^3.4.0" } }, + "body-parser": { + "version": "1.20.1", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz", + "integrity": "sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==", + "requires": { + "bytes": "3.1.2", + "content-type": "~1.0.4", + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "http-errors": "2.0.0", + "iconv-lite": "0.4.24", + "on-finished": "2.4.1", + "qs": "6.11.0", + "raw-body": "2.5.1", + "type-is": "~1.6.18", + "unpipe": "1.0.0" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" + } + } + }, "brace-expansion": { - "version": "1.1.11", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", - "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", "requires": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" + "balanced-match": "^1.0.0" } }, "buffer": { @@ -116,6 +163,11 @@ "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==" }, + "bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==" + }, "call-bind": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz", @@ -176,6 +228,29 @@ "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==" }, + "content-disposition": { + "version": "0.5.4", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", + "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==", + "requires": { + "safe-buffer": "5.2.1" + } + }, + "content-type": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.4.tgz", + "integrity": "sha512-hIP3EEPs8tB9AT1L+NUqtwOAps4mk2Zob89MWXMHjHWg9milF/j4osnnQLXBCBFBk/tvIG/tUc9mOUJiPBhPXA==" + }, + "cookie": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", + "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==" + }, + "cookie-signature": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz", + "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==" + }, "cookiejar": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/cookiejar/-/cookiejar-2.1.3.tgz", @@ -214,6 +289,16 @@ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" }, + "depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==" + }, + "destroy": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz", + "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==" + }, "devtools-protocol": { "version": "0.0.1056733", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1056733.tgz", @@ -228,6 +313,16 @@ "wrappy": "1" } }, + "ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==" + }, + "encodeurl": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz", + "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==" + }, "end-of-stream": { "version": "1.4.4", "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", @@ -244,11 +339,74 @@ "is-arrayish": "^0.2.1" } }, + "escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==" + }, "escape-string-regexp": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==" }, + "etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==" + }, + "express": { + "version": "4.18.2", + "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz", + "integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==", + "requires": { + "accepts": "~1.3.8", + "array-flatten": "1.1.1", + "body-parser": "1.20.1", + "content-disposition": "0.5.4", + "content-type": "~1.0.4", + "cookie": "0.5.0", + "cookie-signature": "1.0.6", + "debug": "2.6.9", + "depd": "2.0.0", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "finalhandler": "1.2.0", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "merge-descriptors": "1.0.1", + "methods": "~1.1.2", + "on-finished": "2.4.1", + "parseurl": "~1.3.3", + "path-to-regexp": "0.1.7", + "proxy-addr": "~2.0.7", + "qs": "6.11.0", + "range-parser": "~1.2.1", + "safe-buffer": "5.2.1", + "send": "0.18.0", + "serve-static": "1.15.0", + "setprototypeof": "1.2.0", + "statuses": "2.0.1", + "type-is": "~1.6.18", + "utils-merge": "1.0.1", + "vary": "~1.1.2" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" + } + } + }, "extract-zip": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", @@ -273,6 +431,35 @@ "pend": "~1.2.0" } }, + "finalhandler": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz", + "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==", + "requires": { + "debug": "2.6.9", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "on-finished": "2.4.1", + "parseurl": "~1.3.3", + "statuses": "2.0.1", + "unpipe": "~1.0.0" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + } + }, + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" + } + } + }, "form-data": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", @@ -301,6 +488,16 @@ } } }, + "forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==" + }, + "fresh": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz", + "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==" + }, "fs-constants": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", @@ -335,16 +532,15 @@ } }, "glob": { - "version": "7.2.3", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", - "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "version": "8.0.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-8.0.3.tgz", + "integrity": "sha512-ull455NHSHI/Y1FqGaaYFaLGkNMMJbavMrEGFXG/PGrg6y7sutWHUHrz6gy6WEBH6akM1M414dWKCNs+IhKdiQ==", "requires": { "fs.realpath": "^1.0.0", "inflight": "^1.0.4", "inherits": "2", - "minimatch": "^3.1.1", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" + "minimatch": "^5.0.1", + "once": "^1.3.0" } }, "has": { @@ -370,6 +566,18 @@ "resolved": "https://registry.npmjs.org/hexoid/-/hexoid-1.0.0.tgz", "integrity": "sha512-QFLV0taWQOZtvIRIAdBChesmogZrtuXvVWsFHZTk2SU+anspqZ2vMnoLg7IE1+Uk16N19APic1BuF8bC8c2m5g==" }, + "http-errors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", + "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", + "requires": { + "depd": "2.0.0", + "inherits": "2.0.4", + "setprototypeof": "1.2.0", + "statuses": "2.0.1", + "toidentifier": "1.0.1" + } + }, "https-proxy-agent": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", @@ -379,6 +587,14 @@ "debug": "4" } }, + "iconv-lite": { + "version": "0.4.24", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", + "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3" + } + }, "ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -407,6 +623,11 @@ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" }, + "ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==" + }, "is-arrayish": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.2.1.tgz", @@ -427,6 +648,11 @@ "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" }, + "lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, "lru-cache": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", @@ -435,6 +661,16 @@ "yallist": "^4.0.0" } }, + "media-typer": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==" + }, + "merge-descriptors": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", + "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==" + }, "methods": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz", @@ -459,11 +695,11 @@ } }, "minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.0.tgz", + "integrity": "sha512-9TPBGGak4nHfGZsPBohm9AWg6NoT7QTCehS3BIJABslyZbzxfV78QM2Y6+i741OPZIafFAaiiEMh5OyIrJPgtg==", "requires": { - "brace-expansion": "^1.1.7" + "brace-expansion": "^2.0.1" } }, "mkdirp-classic": { @@ -476,6 +712,11 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "negotiator": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", + "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==" + }, "node-fetch": { "version": "2.6.7", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.7.tgz", @@ -489,6 +730,14 @@ "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.2.tgz", "integrity": "sha512-z+cPxW0QGUp0mcqcsgQyLVRDoXFQbXOwBaqyF7VIgI4TWNQsDHrBpUQslRmIfAoYWdYzs6UlKJtB2XJpTaNSpQ==" }, + "on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "requires": { + "ee-first": "1.1.1" + } + }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -516,11 +765,21 @@ "lines-and-columns": "^1.1.6" } }, + "parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==" + }, "path-is-absolute": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==" }, + "path-to-regexp": { + "version": "0.1.7", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz", + "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==" + }, "path-type": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", @@ -536,6 +795,15 @@ "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==" }, + "proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "requires": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + } + }, "proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -588,6 +856,22 @@ "side-channel": "^1.0.4" } }, + "range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==" + }, + "raw-body": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz", + "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==", + "requires": { + "bytes": "3.1.2", + "http-errors": "2.0.0", + "iconv-lite": "0.4.24", + "unpipe": "1.0.0" + } + }, "readable-stream": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", @@ -609,6 +893,38 @@ "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", "requires": { "glob": "^7.1.3" + }, + "dependencies": { + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "requires": { + "brace-expansion": "^1.1.7" + } + } } }, "safe-buffer": { @@ -616,6 +932,11 @@ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, "semver": { "version": "7.3.8", "resolved": "https://registry.npmjs.org/semver/-/semver-7.3.8.tgz", @@ -624,6 +945,69 @@ "lru-cache": "^6.0.0" } }, + "send": { + "version": "0.18.0", + "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz", + "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==", + "requires": { + "debug": "2.6.9", + "depd": "2.0.0", + "destroy": "1.2.0", + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "etag": "~1.8.1", + "fresh": "0.5.2", + "http-errors": "2.0.0", + "mime": "1.6.0", + "ms": "2.1.3", + "on-finished": "2.4.1", + "range-parser": "~1.2.1", + "statuses": "2.0.1" + }, + "dependencies": { + "debug": { + "version": "2.6.9", + "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz", + "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==", + "requires": { + "ms": "2.0.0" + }, + "dependencies": { + "ms": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz", + "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==" + } + } + }, + "mime": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz", + "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==" + }, + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + } + } + }, + "serve-static": { + "version": "1.15.0", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz", + "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==", + "requires": { + "encodeurl": "~1.0.2", + "escape-html": "~1.0.3", + "parseurl": "~1.3.3", + "send": "0.18.0" + } + }, + "setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" + }, "side-channel": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", @@ -634,6 +1018,11 @@ "object-inspect": "^1.9.0" } }, + "statuses": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", + "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==" + }, "string_decoder": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", @@ -695,11 +1084,25 @@ "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" }, + "toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==" + }, "tr46": { "version": "0.0.3", "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, + "type-is": { + "version": "1.6.18", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz", + "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==", + "requires": { + "media-typer": "0.3.0", + "mime-types": "~2.1.24" + } + }, "unbzip2-stream": { "version": "1.4.3", "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz", @@ -709,16 +1112,31 @@ "through": "^2.3.8" } }, + "unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==" + }, "util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, + "utils-merge": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", + "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==" + }, "uuid": { "version": "9.0.0", "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==" }, + "vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==" + }, "webidl-conversions": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", diff --git a/package.json b/package.json index b654f22..e898891 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,10 @@ "author": "Edward Peterson", "license": "ISC", "dependencies": { + "body-parser": "^1.20.1", + "express": "^4.18.2", + "glob": "^8.0.3", + "lodash": "^4.17.21", "puppeteer": "^19.2.0", "superagent": "^8.0.3", "uuid": "^9.0.0" diff --git a/processors/bringatrailer.processor.js b/processors/bringatrailer.processor.js new file mode 100644 index 0000000..99a9355 --- /dev/null +++ b/processors/bringatrailer.processor.js @@ -0,0 +1,11 @@ +const genericBootstrapFactory = require("./generic-bootstrap") + +module.exports = { + execute: genericBootstrapFactory({ + pageLoadIndicator: '.gallery', + vinSelector: 'body > main > div.listing > div:nth-child(3) > div.column.column-right.column-right-force > div.essentials > div:nth-child(5) > ul > li:nth-child(1) > a', + carouselTrigger: '.gallery > a:nth-child(1)' + + }), + baseUrl: 'bringatrailer.com' +} \ No newline at end of file diff --git a/processors/classiccars.processor.js b/processors/classiccars.processor.js new file mode 100644 index 0000000..fc52f8b --- /dev/null +++ b/processors/classiccars.processor.js @@ -0,0 +1,11 @@ +const genericBootstrapFactory = require("./generic-bootstrap") + +module.exports = { + execute: genericBootstrapFactory({ + baseUrl: 'classiccars.com', + pageLoadIndicator: '#ListingCarousel', + vinSelector: 'li.p-vin > span:nth-child(2)', + carouselTrigger: 'div.swiper-slide-active > div > img.u-photo' + }), + baseUrl :'classiccars.com' +} \ No newline at end of file diff --git a/processors/generic-bootstrap.js b/processors/generic-bootstrap.js new file mode 100644 index 0000000..3da6901 --- /dev/null +++ b/processors/generic-bootstrap.js @@ -0,0 +1,105 @@ +const uuid = require('uuid'); +module.exports = function (processorConfig) { + + downloadGallery = async (vin) => { + var galleryUrls = []; + async function ensureCarouselVisible() { + var imgWrap; + do { + imgWrap = document.elementFromPoint(100, 100); + console.log(imgWrap.classList); + await delay(50); + } while (imgWrap.classList.contains('pswp__img--placeholder')); + + } + + function delay(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); + } + + // Find the active image, surround it in an anchor tag, then click it. + async function downloadImage(id) { + const imgWrap = document.elementFromPoint(300, 300); + const children = imgWrap.querySelectorAll('.pswp__img'); + const img = children[children.length - 1]; + console.log(img, imgWrap); + // debugger; + // Full image hasn't loaded yet + const src = imgWrap.src.split('?')[0]// get rid of querystring + return downloadSrc(src, id); + } + + function downloadSrc(src, id) { + galleryUrls.push({ + fileName: id, + url: src + }) + } + + function nextImage() { + document.querySelector('.pswp__button.pswp__button--arrow--right').click(); + } + + function getCounterValue() { + const [position, total] = document.querySelector('.pswp__counter').textContent.split('/'); + return parseInt(position.trim(), 10); + } + + function run() { + return new Promise(async (resolve, reject) => { + await ensureCarouselVisible(); + await delay(500); + const firstValue = getCounterValue(); + do { + await downloadImage(`${vin}-${getCounterValue()}`); + nextImage(); + await delay(100); + } while (getCounterValue() !== firstValue); + return resolve(galleryUrls) + }) + + } + + return new Promise((resolve, reject) => { + run().then(resolve); + }); + } + + return async function (page) { + console.log('Running generic boostrap extractor') + + // Type into search box. + // await page.type('.devsite-search-field', 'Headless Chrome'); + + // Wait for suggest overlay to appear and click "show all results". + const allResultsSelector = processorConfig.pageLoadIndicator; + await page.waitForSelector(allResultsSelector); + var vin; + + const vinSelector = processorConfig.vinSelector; + await page.waitForSelector(vinSelector).then(async () => { + let element = await page.$(vinSelector) + vin = await page.evaluate(el => el.textContent, element); + }).catch(error => { + console.error('Unable to grab VIN, falling back to UUID'); + vin = uuid.v4(); + }); + console.log(vin); + // await page.click(allResultsSelector); + + const client = await page.target().createCDPSession() + await client.send('Page.setDownloadBehavior', { + behavior: 'allow', + downloadPath: './images', + }) + const firstImageLinkSelector = processorConfig.carouselTrigger; + await page.click(firstImageLinkSelector); + await page.waitForSelector('.pswp__img') + console.log('Gallery is loaded, fetching URLS') + const galleryUrls = await page.evaluate(downloadGallery, vin); + + + + return galleryUrls + } +} \ No newline at end of file