From 79c07496061e63b11393c46e4f512996d198408f Mon Sep 17 00:00:00 2001 From: Edward Peterson Date: Thu, 17 Nov 2022 17:01:32 -0500 Subject: [PATCH] Added new processors --- index.js | 8 ++++++-- processors/davidsclassiccars.processor.js | 17 +++++++++++++++++ processors/generic-bootstrap.js | 10 ---------- processors/hemmings.processor.js | 12 ++++++++++++ processors/topclassiccarsforsale.processor.js | 15 +++++++++++++++ 5 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 processors/davidsclassiccars.processor.js create mode 100644 processors/hemmings.processor.js create mode 100644 processors/topclassiccarsforsale.processor.js diff --git a/index.js b/index.js index 2ffe0f6..a3b4002 100644 --- a/index.js +++ b/index.js @@ -22,14 +22,15 @@ app.use(bodyParser.json()) app.post('/convertGalleryToHar', async (req, res) => { const url = req.body.url; console.log(url); + // get processor - const processor = _.find(processors, (processor) => url.includes(processor.baseUrl)); + const processor = _.find(processors, (processor) => ((new URL(url)).hostname) === (processor.baseUrl)); if (!processor) { return res.status(400).json({ message: 'Could not find processor for url' }) } - console.log('Processor found'); + console.log('Processor found', processor.baseUrl); try { const payloads = await run(url, processor); res.status(200).json({ @@ -44,6 +45,9 @@ app.post('/convertGalleryToHar', async (req, res) => { } }) +app.get('/supportedSites', async(req, res) => { + res.status(200).json(processors.map(p => p.baseUrl)); +}) app.listen(2667); async function run(url, processor) { diff --git a/processors/davidsclassiccars.processor.js b/processors/davidsclassiccars.processor.js new file mode 100644 index 0000000..edfc183 --- /dev/null +++ b/processors/davidsclassiccars.processor.js @@ -0,0 +1,17 @@ +module.exports = { + baseUrl: 'davidsclassiccars.com', + execute: async function(page) { + const pageLoadIndicator = '.bx-viewport'; + await page.waitForSelector(pageLoadIndicator); + const imageSelector = '.carimage > img'; + const images = await page.$$(imageSelector); + console.log(`Found ${images.length} images...`) + const sources = await Promise.all(images.map(async carouselItem => { + const src = await page.evaluate(el => el.getAttribute('src'), carouselItem); + // console.log(src); + return { url: this.baseUrl + src }; + })); + return sources; + + } +} \ No newline at end of file diff --git a/processors/generic-bootstrap.js b/processors/generic-bootstrap.js index 3da6901..8796930 100644 --- a/processors/generic-bootstrap.js +++ b/processors/generic-bootstrap.js @@ -68,10 +68,6 @@ module.exports = function (processorConfig) { return async function (page) { console.log('Running generic boostrap extractor') - // Type into search box. - // await page.type('.devsite-search-field', 'Headless Chrome'); - - // Wait for suggest overlay to appear and click "show all results". const allResultsSelector = processorConfig.pageLoadIndicator; await page.waitForSelector(allResultsSelector); var vin; @@ -85,13 +81,7 @@ module.exports = function (processorConfig) { vin = uuid.v4(); }); console.log(vin); - // await page.click(allResultsSelector); - const client = await page.target().createCDPSession() - await client.send('Page.setDownloadBehavior', { - behavior: 'allow', - downloadPath: './images', - }) const firstImageLinkSelector = processorConfig.carouselTrigger; await page.click(firstImageLinkSelector); await page.waitForSelector('.pswp__img') diff --git a/processors/hemmings.processor.js b/processors/hemmings.processor.js new file mode 100644 index 0000000..a195f94 --- /dev/null +++ b/processors/hemmings.processor.js @@ -0,0 +1,12 @@ +const genericBootstrapFactory = require("./generic-bootstrap") + +module.exports = { + execute: genericBootstrapFactory({ + baseUrl: 'hemmings.com', + pageLoadIndicator: '#galleries', + vinSelector: '.leading-loose > li:nth-child(2) > span:nth-child(2)', + carouselTrigger: '.bg-center' + }), + baseUrl :'hemmings.com' +} + diff --git a/processors/topclassiccarsforsale.processor.js b/processors/topclassiccarsforsale.processor.js new file mode 100644 index 0000000..95f41d3 --- /dev/null +++ b/processors/topclassiccarsforsale.processor.js @@ -0,0 +1,15 @@ +module.exports = { + baseUrl: 'topclassiccarsforsale.com', + async execute(page) { + const gallerySelector = '.bx-viewport' + const imageSelector = '.bx-viewport > ul > li:not(.bx-clone) > img'; + await page.waitForSelector(gallerySelector); + const images = await page.$$(imageSelector); + const sources = await Promise.all(images.map(async carouselItem => { + const src = await page.evaluate(el => el.getAttribute('src'), carouselItem); + console.log(src); + return { url: this.baseUrl + src }; + })); + return sources; + } +} \ No newline at end of file