You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123 lines
3.6 KiB
JavaScript

const puppeteer = require('puppeteer')
const superagent = require('superagent');
const path = require('path');
const express = require('express');
const glob = require('glob');
const _ = require('lodash');
const app = express();
var bodyParser = require('body-parser')
const CronJob = require('cron').CronJob;
const processors = [];
//load the processor files
glob(path.resolve(__dirname, 'processors/*.processor.js'), (error, matches) => {
console.log(matches);
_.forEach(matches, file => {
const processor = require(path.resolve(__dirname, file));
processors.push(processor);
})
console.log(`${matches.length} processors loaded`);
})
//load the crawler files
glob(path.resolve(__dirname, 'crawlers/*.crawler.js'), (error, matches) => {
console.log(matches);
_.forEach(matches, file => {
const crawler = require(path.resolve(__dirname, file));
const cronJob = new CronJob(crawler.cronString, crawler.run);
cronJob.start();
})
console.log(`${matches.length} crawlers loaded`);
})
app.use(bodyParser.json())
app.post('/convertGalleryToHar', async (req, res) => {
const url = req.body.url;
console.log(url);
// get processor
const searchableHostname = (new URL(url)).hostname.replace(/^www\./i, '');
console.log('Searching for hostname:', searchableHostname)
const processor = _.find(processors, (processor) => (searchableHostname) === (processor.baseUrl.replace(/^www\./i, '')));
if (!processor) {
return res.status(400).json({
message: 'Could not find processor for url'
})
}
console.log('Processor found', processor.baseUrl);
try {
const payloads = await run(url, processor);
res.status(200).json({
vin: payloads.vin,
mileage: payloads.mileage,
log: {
entries: payloads.payloads
}
// payloads
})
} catch (error) {
console.log(error);
res.status(500).json(error);
}
})
app.get('/supportedSites', async (req, res) => {
res.status(200).json(processors.map(p => p.baseUrl));
})
app.listen(2667);
async function run(url, processor) {
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.setViewport({
width: 1200,
height: 800
});
await page.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36");
console.log('Loading page...');
await page.goto(url, { timeout: 60000 });
console.log('page loaded');
console.log('attempting to parse fields');
const vin = await processor.parseVIN(page);
console.log('parsed VIN:', vin);
const mileage = await processor.parseMileage(page);
console.log('parsed Mileage:', mileage);
const galleryUrls = await processor.execute(page);
await page.close();
console.log('Done collecting URLS', galleryUrls.length);
const payloads = await Promise.all(galleryUrls.map(image => new Promise(async (resolve, reject) => {
superagent.get(image.url).responseType('blob').then(function (response) {
if (response.statusCode == 200) {
console.log('Resolving', image.url)
return resolve({
response: {
content: {
mimeType: response.headers["content-type"],
encoding: 'base64',
text: response.body.toString('base64')
}
}
});
} else {
console.log("Invalid status code", response.statusCode, 'for', image.url);
resolve({})
}
}).catch(error => {
console.error(error)
resolve({})
});
})))
console.log('URLS done downloading')
await browser.close();
return {
vin,
payloads,
mileage
};
}
// run();