const puppeteer = require('puppeteer') const fs = require('fs'); const superagent = require('superagent'); const path = require('path'); var url = ''; process.argv.forEach((val, index, array) => { console.log(val) if(val.startsWith('url=')){ url = val.substring(4); } }) if(!url || url.length === 0) { console.log('Need to supply url'); process.exit(); } const processors = [{ baseUrl: 'bringatrailer.com', pageLoadIndicator: '.gallery', vinSelector: 'body > main > div.listing > div:nth-child(3) > div.column.column-right.column-right-force > div.essentials > div:nth-child(5) > ul > li:nth-child(1) > a', carouselTrigger: '.gallery > a:nth-child(1)' }, { baseUrl: 'classiccars.com', pageLoadIndicator: '#ListingCarousel', vinSelector: 'li.p-vin > span:nth-child(2)', carouselTrigger: 'div.swiper-slide-active > div > img.u-photo' }] var processorConfig = {} processors.forEach(config => { if(url.includes(config.baseUrl)){ processorConfig = config; } }) if(!processorConfig) { console.log('Unsupported URL'); process.exit(); } const run = async () => { const browser = await puppeteer.launch({ headless:true }); const page = await browser.newPage(); await page.goto(url); // Type into search box. // await page.type('.devsite-search-field', 'Headless Chrome'); // Wait for suggest overlay to appear and click "show all results". const allResultsSelector = processorConfig.pageLoadIndicator; await page.waitForSelector(allResultsSelector); const vinSelector = processorConfig.vinSelector; await page.waitForSelector(vinSelector); let element = await page.$(vinSelector) let vin = await page.evaluate(el => el.textContent, element); console.log(vin); // await page.click(allResultsSelector); const client = await page.target().createCDPSession() await client.send('Page.setDownloadBehavior', { behavior: 'allow', downloadPath: './images', }) const firstImageLinkSelector = processorConfig.carouselTrigger; await page.click(firstImageLinkSelector); await page.waitForSelector('.pswp__img') console.log('Gallery is loaded, fetching URLS') const galleryUrls = await page.evaluate(downloadGallery, vin); console.log('Done collecting URLS', galleryUrls.length); await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => { const fileType = path.extname(image.url); var stream = fs.createWriteStream(path.resolve('images', `${image.fileName}${fileType}`)); stream.on('finish', resolve); superagent.get(image.url).pipe(stream); }))) console.log('URLS done downloading') await browser.close(); }; downloadGallery = async (vin) => { var galleryUrls = []; async function ensureCarouselVisible() { var imgWrap; do { imgWrap = document.elementFromPoint(100, 100); console.log(imgWrap.classList); await delay(50); } while(imgWrap.classList.contains('pswp__img--placeholder')); } function delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } // Find the active image, surround it in an anchor tag, then click it. async function downloadImage(id) { const imgWrap = document.elementFromPoint(300, 300); const children = imgWrap.querySelectorAll('.pswp__img'); const img = children[children.length - 1]; console.log(img, imgWrap); // debugger; // Full image hasn't loaded yet const src = imgWrap.src.split('?')[0]// get rid of querystring return downloadSrc(src, id); } function downloadSrc(src, id) { galleryUrls.push({ fileName: id, url:src }) } function nextImage() { document.querySelector('.pswp__button.pswp__button--arrow--right').click(); } function getCounterValue() { const [position, total] = document.querySelector('.pswp__counter').textContent.split('/'); return parseInt(position.trim(), 10); } function run() { return new Promise(async (resolve, reject) => { await ensureCarouselVisible(); await delay(500); const firstValue = getCounterValue(); do { await downloadImage(`${vin}-${getCounterValue()}`); nextImage(); await delay(100); } while(getCounterValue() !== firstValue); return resolve(galleryUrls) }) } return new Promise((resolve, reject) => { run().then(resolve); }); } run();