const puppeteer = require('puppeteer') const superagent = require('superagent'); const path = require('path'); const express = require('express'); const glob = require('glob'); const _ = require('lodash'); const app = express(); var bodyParser = require('body-parser') const processors = []; //load the processor files glob(path.resolve(__dirname, 'processors/*.processor.js'), (error, matches) => { console.log(matches); _.forEach(matches, file => { const processor = require(path.resolve(__dirname, file)); processors.push(processor); }) console.log(`${matches.length} processors loaded`); }) app.use(bodyParser.json()) app.post('/convertGalleryToHar', async (req, res) => { const url = req.body.url; console.log(url); // get processor const searchableHostname = (new URL(url)).hostname.replace(/^www\./i, ''); console.log('Searching for hostname:', searchableHostname) const processor = _.find(processors, (processor) => (searchableHostname) === (processor.baseUrl.replace(/^www\./i, ''))); if (!processor) { return res.status(400).json({ message: 'Could not find processor for url' }) } console.log('Processor found', processor.baseUrl); try { const payloads = await run(url, processor); res.status(200).json({ log: { entries: payloads } // payloads }) } catch (error) { console.log(error); res.status(500).json(error); } }) app.get('/supportedSites', async (req, res) => { res.status(200).json(processors.map(p => p.baseUrl)); }) app.listen(2667); async function run(url, processor) { const browser = await puppeteer.launch({ headless: true }); const page = await browser.newPage(); await page.setViewport({ width: 1200, height: 800 }); await page.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"); console.log('Loading page...'); await page.goto(url, { timeout: 60000 }); console.log('page loaded') const galleryUrls = await processor.execute(page); console.log('Done collecting URLS', galleryUrls.length); const payloads = await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => { superagent.get(image.url).responseType('blob').then(function (response) { if (response.statusCode == 200) { return resolve({ response: { content: { mimeType: response.headers["content-type"], encoding: 'base64', text: response.body.toString('base64') } } }); } resolve({}) }).catch(error => { resolve({}) }); }))) console.log('URLS done downloading') await browser.close(); return payloads; } // run();