You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
158 lines
4.8 KiB
JavaScript
158 lines
4.8 KiB
JavaScript
const puppeteer = require('puppeteer')
|
|
const fs = require('fs');
|
|
const superagent = require('superagent');
|
|
const path = require('path');
|
|
const uuid = require('uuid');
|
|
|
|
var url = '';
|
|
process.argv.forEach((val, index, array) => {
|
|
console.log(val)
|
|
if(val.startsWith('url=')){
|
|
url = val.substring(4);
|
|
}
|
|
|
|
})
|
|
if(!url || url.length === 0) {
|
|
console.log('Need to supply url');
|
|
process.exit();
|
|
}
|
|
|
|
|
|
const processors = [{
|
|
baseUrl: 'bringatrailer.com',
|
|
pageLoadIndicator: '.gallery',
|
|
vinSelector: 'body > main > div.listing > div:nth-child(3) > div.column.column-right.column-right-force > div.essentials > div:nth-child(5) > ul > li:nth-child(1) > a',
|
|
carouselTrigger: '.gallery > a:nth-child(1)'
|
|
|
|
}, {
|
|
baseUrl: 'classiccars.com',
|
|
pageLoadIndicator: '#ListingCarousel',
|
|
vinSelector: 'li.p-vin > span:nth-child(2)',
|
|
carouselTrigger: 'div.swiper-slide-active > div > img.u-photo'
|
|
}]
|
|
var processorConfig = {}
|
|
processors.forEach(config => {
|
|
if(url.includes(config.baseUrl)){
|
|
processorConfig = config;
|
|
}
|
|
})
|
|
if(!processorConfig) {
|
|
console.log('Unsupported URL');
|
|
process.exit();
|
|
}
|
|
|
|
const run = async () => {
|
|
const browser = await puppeteer.launch({
|
|
headless:true
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
|
|
await page.goto(url);
|
|
|
|
// Type into search box.
|
|
// await page.type('.devsite-search-field', 'Headless Chrome');
|
|
|
|
// Wait for suggest overlay to appear and click "show all results".
|
|
const allResultsSelector = processorConfig.pageLoadIndicator;
|
|
await page.waitForSelector(allResultsSelector);
|
|
var vin;
|
|
|
|
const vinSelector = processorConfig.vinSelector;
|
|
await page.waitForSelector(vinSelector).then(async () => {
|
|
let element = await page.$(vinSelector)
|
|
vin = await page.evaluate(el => el.textContent, element);
|
|
}).catch(error => {
|
|
console.error('Unable to grab VIN, falling back to UUID');
|
|
vin = uuid.v4();
|
|
});
|
|
console.log(vin);
|
|
// await page.click(allResultsSelector);
|
|
|
|
const client = await page.target().createCDPSession()
|
|
await client.send('Page.setDownloadBehavior', {
|
|
behavior: 'allow',
|
|
downloadPath: './images',
|
|
})
|
|
const firstImageLinkSelector = processorConfig.carouselTrigger;
|
|
await page.click(firstImageLinkSelector);
|
|
await page.waitForSelector('.pswp__img')
|
|
console.log('Gallery is loaded, fetching URLS')
|
|
const galleryUrls = await page.evaluate(downloadGallery, vin);
|
|
console.log('Done collecting URLS', galleryUrls.length);
|
|
await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => {
|
|
const fileType = path.extname(image.url);
|
|
var stream = fs.createWriteStream(path.resolve('images', `${image.fileName}${fileType}`));
|
|
stream.on('finish', resolve);
|
|
superagent.get(image.url).pipe(stream);
|
|
})))
|
|
console.log('URLS done downloading')
|
|
|
|
await browser.close();
|
|
};
|
|
|
|
downloadGallery = async (vin) => {
|
|
var galleryUrls = [];
|
|
async function ensureCarouselVisible() {
|
|
var imgWrap;
|
|
do {
|
|
imgWrap = document.elementFromPoint(100, 100);
|
|
console.log(imgWrap.classList);
|
|
await delay(50);
|
|
} while(imgWrap.classList.contains('pswp__img--placeholder'));
|
|
|
|
}
|
|
|
|
function delay(ms) {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
// Find the active image, surround it in an anchor tag, then click it.
|
|
async function downloadImage(id) {
|
|
const imgWrap = document.elementFromPoint(300, 300);
|
|
const children = imgWrap.querySelectorAll('.pswp__img');
|
|
const img = children[children.length - 1];
|
|
console.log(img, imgWrap);
|
|
// debugger;
|
|
// Full image hasn't loaded yet
|
|
const src = imgWrap.src.split('?')[0]// get rid of querystring
|
|
return downloadSrc(src, id);
|
|
}
|
|
|
|
function downloadSrc(src, id) {
|
|
galleryUrls.push({
|
|
fileName: id,
|
|
url:src
|
|
})
|
|
}
|
|
|
|
function nextImage() {
|
|
document.querySelector('.pswp__button.pswp__button--arrow--right').click();
|
|
}
|
|
|
|
function getCounterValue() {
|
|
const [position, total] = document.querySelector('.pswp__counter').textContent.split('/');
|
|
return parseInt(position.trim(), 10);
|
|
}
|
|
|
|
function run() {
|
|
return new Promise(async (resolve, reject) => {
|
|
await ensureCarouselVisible();
|
|
await delay(500);
|
|
const firstValue = getCounterValue();
|
|
do {
|
|
await downloadImage(`${vin}-${getCounterValue()}`);
|
|
nextImage();
|
|
await delay(100);
|
|
} while(getCounterValue() !== firstValue);
|
|
return resolve(galleryUrls)
|
|
})
|
|
|
|
}
|
|
|
|
return new Promise((resolve, reject) => {
|
|
run().then(resolve);
|
|
});
|
|
}
|
|
|
|
run(); |