You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

153 lines
4.6 KiB
JavaScript

const puppeteer = require('puppeteer')
const fs = require('fs');
const superagent = require('superagent');
const path = require('path');
var url = '';
process.argv.forEach((val, index, array) => {
console.log(val)
if(val.startsWith('url=')){
url = val.substring(4);
}
})
if(!url || url.length === 0) {
console.log('Need to supply url');
process.exit();
}
const processors = [{
baseUrl: 'bringatrailer.com',
pageLoadIndicator: '.gallery',
vinSelector: 'body > main > div.listing > div:nth-child(3) > div.column.column-right.column-right-force > div.essentials > div:nth-child(5) > ul > li:nth-child(1) > a',
carouselTrigger: '.gallery > a:nth-child(1)'
}, {
baseUrl: 'classiccars.com',
pageLoadIndicator: '#ListingCarousel',
vinSelector: 'li.p-vin > span:nth-child(2)',
carouselTrigger: 'div.swiper-slide-active > div > img.u-photo'
}]
var processorConfig = {}
processors.forEach(config => {
if(url.includes(config.baseUrl)){
processorConfig = config;
}
})
if(!processorConfig) {
console.log('Unsupported URL');
process.exit();
}
const run = async () => {
const browser = await puppeteer.launch({
headless:true
});
const page = await browser.newPage();
await page.goto(url);
// Type into search box.
// await page.type('.devsite-search-field', 'Headless Chrome');
// Wait for suggest overlay to appear and click "show all results".
const allResultsSelector = processorConfig.pageLoadIndicator;
await page.waitForSelector(allResultsSelector);
const vinSelector = processorConfig.vinSelector;
await page.waitForSelector(vinSelector);
let element = await page.$(vinSelector)
let vin = await page.evaluate(el => el.textContent, element);
console.log(vin);
// await page.click(allResultsSelector);
const client = await page.target().createCDPSession()
await client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: './images',
})
const firstImageLinkSelector = processorConfig.carouselTrigger;
await page.click(firstImageLinkSelector);
await page.waitForSelector('.pswp__img')
console.log('Gallery is loaded, fetching URLS')
const galleryUrls = await page.evaluate(downloadGallery, vin);
console.log('Done collecting URLS', galleryUrls.length);
await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => {
const fileType = path.extname(image.url);
var stream = fs.createWriteStream(path.resolve('images', `${image.fileName}${fileType}`));
stream.on('finish', resolve);
superagent.get(image.url).pipe(stream);
})))
console.log('URLS done downloading')
await browser.close();
};
downloadGallery = async (vin) => {
var galleryUrls = [];
async function ensureCarouselVisible() {
var imgWrap;
do {
imgWrap = document.elementFromPoint(100, 100);
console.log(imgWrap.classList);
await delay(50);
} while(imgWrap.classList.contains('pswp__img--placeholder'));
}
function delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// Find the active image, surround it in an anchor tag, then click it.
async function downloadImage(id) {
const imgWrap = document.elementFromPoint(300, 300);
const children = imgWrap.querySelectorAll('.pswp__img');
const img = children[children.length - 1];
console.log(img, imgWrap);
// debugger;
// Full image hasn't loaded yet
const src = imgWrap.src.split('?')[0]// get rid of querystring
return downloadSrc(src, id);
}
function downloadSrc(src, id) {
galleryUrls.push({
fileName: id,
url:src
})
}
function nextImage() {
document.querySelector('.pswp__button.pswp__button--arrow--right').click();
}
function getCounterValue() {
const [position, total] = document.querySelector('.pswp__counter').textContent.split('/');
return parseInt(position.trim(), 10);
}
function run() {
return new Promise(async (resolve, reject) => {
await ensureCarouselVisible();
await delay(500);
const firstValue = getCounterValue();
do {
await downloadImage(`${vin}-${getCounterValue()}`);
nextImage();
await delay(100);
} while(getCounterValue() !== firstValue);
return resolve(galleryUrls)
})
}
return new Promise((resolve, reject) => {
run().then(resolve);
});
}
run();