Skip to content
Snippets Groups Projects
Commit 159bc7c2 authored by Iain Bryson's avatar Iain Bryson
Browse files

Add jobs for zip and getpdf, along with purging of jobs.

parent b3efb530
Branches
No related tags found
No related merge requests found
#!/bin/bash
#!/usr/bin/bash
# /get-pdf.sh 01005 course.zip <jobid>
course=$1
course_zip=$2
jobid=$3
project="/tmp/$course/$jobid"
export LANG=en_US.UTF-8
export LANGUAGE=en_US.UTF-8
export LC_ALL=en_US.UTF-8
rm -rf $project
mkdir -p $project
pdflatex="/usr/local/texlive/2017/bin/x86_64-linux/pdflatex -halt-on-error -interaction=nonstopmode"
echo "/get-pdf.sh $course $course_zip $jobid"
exit;
# ---- Download the enote as zip
unzip $course_zip -d $project
if [[ $? != 0 ]]; then
exit 1
fi
cd $project
# ---- Make each chapter in sections/ a pdf with working cross references
if [ $course == 01005 ] || [ $course == 01006 ]; then
# make a list of where the labels are in pdf position
cd main
$pdflatex main.tex
perl -lne 'print "$1%$2" if /^\\newlabel\{(.*?)\}.*\{(.*)\}\{\}\}$/' main.aux | sort | uniq > ../label2pos
cd ..
cd sections
ls *tex | parallel -j 8 "$pdflatex {}"
for f in *tex; do perl -lne 'print "$1%$ARGV" if /\label\{(.*?)\}/' $f; done | sort | uniq > ../label2file
# files which contain references to other files
# these \refs needs to be replaced with \href{./filename#label}
for f in *.log; do perl -lne 'print "$1%$ARGV" if /LaTeX Warning: Reference \`(.*)'\''/' $f; done | sort | uniq > ../refs2update
while IFS=% read -r R FILE; do
PDF=$(env lab=$R perl -ne 'print "$1.pdf" if /$ENV{lab}%(.*)\.tex/' ../label2file)
POS=$(env lab=$R perl -ne 'print $1 if /$ENV{lab}%(.*)/' ../label2pos)
if [[ -z $PDF ]] || [[ -z $POS ]]; then
continue
fi
SRC="\ref{$R}"
DST="\href{./$PDF#$POS}{${POS#*.}}"
env src="$SRC" env dst="$DST" perl -i -pe 's/\Q$ENV{src}\E/$ENV{dst}/g' ${FILE%.*}.tex
done < ../refs2update
for f in *.tex; do cat $f | /usr/local/rvm/rubies/ruby-2.2.4/bin/ruby -0777 -pe '$_.gsub!(/href{[^}]+}/){ |href| href.gsub(%q|#|, %q|\#|) }' | sponge $f; done
ls *tex | parallel -j 8 "$pdflatex {}"
co="/enotes/$course"
mkdir -p $co
rm -f $co/*
cp *pdf $co/
fi
if [ $course == 02323 ] || [ $course == 02402 ]; then
mkdir -p /cache/$course
rm -rf cache
ln -s /cache/$course cache
co="/enotes/$course"
R -e "source('make.R')" 2>&1 > $co/log.txt
mkdir -p $co
mv $co/log.txt /tmp/
rm -rf $co/*
mv /tmp/log.txt $co/
cp -r menu/* $co/
fi
......@@ -3,13 +3,20 @@ import yaml from 'js-yaml';
import path from 'path';
import fs from 'fs';
import logger from './logger';
import {zipProject} from './lib/zip_project';
import {ObjectId} from "mongojs";
import initializeDb from './db';
import BuildPDFJob from './jobs/build_pdf_job';
import ZipJob from './jobs/zip_job';
const vol_root = process.env.DTU_ENOTE_VOL || "/data";
const config_root = path.join(vol_root, "config");
const mongoConnectionString = "sharelatex-db.docker.local:27018/getpdf_agenda";
export const a = new Agenda({db: {address: mongoConnectionString}});
const a = new Agenda({defaultConcurrency: 1, db: {address: mongoConnectionString}});
//a.purge(function(err, numRemoved) {});
const sharelatex_config_file = fs.readFileSync(path.join(config_root, "sharelatex.yaml"), {encoding: 'utf8'});
console.log(sharelatex_config_file)
......@@ -17,13 +24,57 @@ const sharelatex_config = yaml.safeLoad(sharelatex_config_file);
console.dir(sharelatex_config);
var tasks = {getpdf: {}, zip:{}};
initializeDb((dbs) => {
const sldb = dbs.sharelatex_db;
for (let course of Object.keys(sharelatex_config)) {
logger.debug(`Creating job definition for getpdf for ${course}`);
const opts = {priority: 'high', concurrency: 10};
continue;
a.define(`GetPDF for ${course}`, opts, function(job, done) {
// User.remove({lastLogIn: { $lt: twoDaysAgo }}, done);
done();
const opts = { priority: 'high', concurrency: 1 };
const project_id = sharelatex_config[course].enote_project_id;
const getpdf_task_name = `GetPDF for ${course}`;
tasks.getpdf[course] = {name: getpdf_task_name};
const after_zip = (zip_details, project, job, done) => {
const buildPDFJob = new BuildPDFJob(course, project, job);
buildPDFJob.jobFunc(job, done);
}
const zipAndGetPDFJob = new ZipJob(sldb, project_id, course, after_zip);
a.define(getpdf_task_name, opts, zipAndGetPDFJob.jobFunc.bind(zipAndGetPDFJob));
const zip_task_name = `Zip for ${course}`;
tasks.zip[course] = {name: zip_task_name};
const zipOnlyJob = new ZipJob(sldb, project_id, course);
a.define(zip_task_name, opts, zipOnlyJob.jobFunc.bind(zipOnlyJob));
}
a.on('ready', function () {
a.start();
});
});
function graceful() {
a.stop(function () {
process.exit(0);
});
}
process.on('SIGTERM', graceful);
process.on('SIGINT', graceful);
a.on('start', function (job) {
logger.info('Job %s starting', job.attrs.name);
});
a.on('complete', function (job) {
logger.info('Job %s finished', job.attrs.name);
});
a.on('fail', function (job) {
logger.error('Job %s FAILED', job.attrs.name);
});
export {a as agenda, tasks, sharelatex_config};
......@@ -5,6 +5,8 @@ import path from 'path';
import async from 'async';
import fs from 'fs';
import logger from '../logger';
import { agenda, tasks } from '../agenda';
class GetPDFRouter {
constructor({config, dbs, router}) {
......@@ -14,6 +16,14 @@ class GetPDFRouter {
}
listJobs(req, res) {
const job_name = tasks.getpdf[req.params.course_id].name;
agenda.jobs({ name: job_name }, function(err, jobs) {
logger.info(`Got ${jobs.length} jobs for job ${job_name}`);
res.status(200)
.json(jobs);
});
/*
this.dbs.getpdf_db.jobs.find({active: {$eq: true}}, (err, existing) => {
if (err) {
throw err;
......@@ -24,6 +34,7 @@ class GetPDFRouter {
.json({error: "Cannot create a new job; current job exists"});
} else {
const insert = this.dbs.getpdf_db.jobs.insert({active: true}, (err, result) => {
if (err) {
throw err;
......@@ -32,15 +43,20 @@ class GetPDFRouter {
res.status(200)
.json(result);
});
}
});
});*/
}
newJob(req, res) {
console.dir(req.params)
console.dir(req.params);
var job = agenda.now(tasks.getpdf[req.params.course_id].name, {}, (err, j) => {
logger.error(err);
// job.unique
res.status(200)
.send(`${req.params.course_id}`);
.json(job);
});
}
}
......@@ -57,10 +73,64 @@ function getPDFRouter({config, dbs}) {
getpdfRouter.newJob(req, res);
});
router.route('/clean')
.get(function (req, res) {
const job_name = tasks.getpdf[req.params.course_id].name;
agenda.jobs({ name: job_name }, function(err, jobs) {
logger.info(`Got ${jobs.length} jobs for job ${job_name}`);
const j = jobs.map((job) => {delete job.attrs.data; return job} );
res.status(200)
.json(j);
});
});
router.route('/purge')
.get(function (req, res) {
const max_jobs = 1;
const job_name = tasks.getpdf[req.params.course_id].name;
agenda.jobs({ name: job_name }, function(err, jobs) {
console.dir(jobs);
jobs.sort((j1,j2) => (
(new Date(j2.attrs.lastFinishedAt) - new Date(j1.attrs.lastFinishedAt))
))
logger.info(`Purge: Got ${jobs.length} jobs for job ${job_name}`);
const tasks = jobs.slice(max_jobs).map( (job) => {
return (cb) => {
console.dir(job);
job.remove(function (err) {
if (!err) console.log('Purge: Successfully removed job from collection');
cb();
})
}
});
async.series(tasks, () => {
res.status(200)
.json({});
})
});
});
router.route('/cancel')
.get(function (req, res) {
const job_name = tasks.getpdf[req.params.course_id].name;
agenda.cancel({ name: job_name }, function(err, jobs) {
// logger.info(`Got ${jobs.length} jobs for job ${job_name}`);
// const j = jobs.map((job) => {delete job.attrs.data; return job} );
res.status(200)
.json(err);
});
});
router.route('/:job_id')
.get(function (req, res) {
const job_id = req.params.job_id;
agenda.jobs({_id: ObjectId(job_id)}, function(err, jobs) {
res.status(200)
.send(`${req.params.course_id} ${req.params.job_id}`);
.json(jobs);
});
});
return router;
......
......@@ -5,112 +5,8 @@ import archiver from 'archiver';
import async from 'async';
import fs from 'fs';
import logger from '../logger';
import { zipProject } from '../lib/zip_project';
//const SL_DATA_PATH = "/var/lib/sharelatex";
const SL_DATA_PATH = "/Users/iainbryson/Projects/DTUQuiz/enote-devel/vol/sharelatex-data/";
function _getAllFoldersFromProject(project, callback) {
var folders = {}
const processFolder = (basePath, folder) => {
folders[basePath] = folder;
for (var childFolder of (folder.folders || [])) {
if (childFolder.name !== undefined) {
processFolder(path.join(basePath, childFolder.name), childFolder);
}
}
}
processFolder("/", project.rootFolder[0])
callback(null, folders)
}
function _zipProject(db, project, callback) {
const archive = archiver("zip");
archive.on("error", (err) => {
console.log("Can't create zip");
console.dir(err);
})
const query = {project_id: ObjectId(project._id)};
db.docs.find(query, (err, docs) => {
var docContents = new Map(docs.map((doc) => [doc._id.toString(), doc]));
_getAllFoldersFromProject(project, (err, folders) => {
var docArchiveInfo = new Map();
var fileInfo = new Map();
for (let [folderPath, folder] of Object.entries(folders)) {
for (let doc of (folder.docs || [])) {
const docPath = path.join(folderPath, doc.name);
const content = docContents.get(doc._id.toString());
if (content !== undefined) {
docArchiveInfo.set(docPath, {
_id: doc._id,
name: doc.name,
lines: content.lines,
rev: content.rev
});
} else {
console.log("STRANGE! no content for ")
console.log(`${docPath}: ${content}`);
console.dir(doc._id.toString());
}
}
const filesRootPath = path.join(SL_DATA_PATH, "data", "user_files");
for (let fileRef of (folder.fileRefs || [])) {
const filePath = path.join(folderPath, fileRef.name);
const content = fs.createReadStream(path.join(filesRootPath, `${project._id.toString()}_${fileRef._id.toString()}`));
if (content !== undefined) {
fileInfo.set(filePath, {
_id: fileRef._id,
name: fileRef.name,
content: content,
});
} else {
console.log("STRANGE! can't find file for fileRef")
console.log(`${filePath}: ${content}`);
console.dir(fileRef._id.toString());
}
}
}
var jobs = [];
for (let [path, doc] of docArchiveInfo) {
const relPath = path[0] === "/" ? path.slice(1) : path;
jobs.push( (callback) => {
logger.debug(`Adding doc ${relPath} to archive`);
archive.append(doc.lines.join("\n"), {name:relPath});
callback();
})
}
for (let [path, doc] of fileInfo) {
const relPath = path[0] === "/" ? path.slice(1) : path;
jobs.push( (callback) => {
logger.debug(`Adding file ${relPath} to archive`);
archive.append(doc.content, {name:relPath});
callback();
})
}
const zipPath = path.join(SL_DATA_PATH, "backups", `${project.name}.zip`);
var output = fs.createWriteStream(zipPath);
archive.pipe(output);
logger.info(`Archiving ${jobs.length} docs to archive`);
async.series(jobs, () => {
logger.info(`Finalizing to archive`);
archive.finalize();
callback(null, {
project_id: project._id.toString(),
path: zipPath
})
})
});
})
}
export default ({ config, db }) => resource({
......@@ -123,7 +19,8 @@ export default ({ config, db }) => resource({
load(req, id, callback) {
db.projects.findOne({_id: ObjectId(id)}, (err, project) => {
callback(err, project);
console.dir(project);
// callback(err, project);
});
},
......@@ -133,7 +30,7 @@ export default ({ config, db }) => resource({
var allDetails = [];
var jobs = projects.map( (project) => {
return (callback) => {
_zipProject(db, project, (err, details) => {
zipProject(db, project, (err, details) => {
allDetails.push(details);
callback(details);
})
......@@ -149,7 +46,7 @@ export default ({ config, db }) => resource({
/** GET /:id - Return a given entity */
read({ project }, res) {
_zipProject(db, project, (err, details) => {
zipProject(db, project, (err, details) => {
res.json(details);
})
},
......
import {zipFilePath} from "../lib/zip_project";
const { spawn } = require('child_process');
class BuildPDFJob {
constructor(course_id, project) {
this.course_id = course_id;
this.project = project;
}
jobFunc(job, done) {
const zipPath = zipFilePath(this.project._id, this.project.name);
let self = this;
const child = spawn(`./build-pdf.sh ${this.course_id} ${zipPath} ${job.attrs._id.toString()} `, {
shell: true,
cwd: '.'
});
child.stdout.on('data', (data) => {
console.dir(data.toString());
job.attrs.data.logs.push(data.toString());
job.save();
});
child.stderr.on('data', (data) => {
console.dir(data.toString());
job.attrs.data.logs.push(data.toString());
job.save();
});
child.on('exit', function (code, signal) {
const msg = 'child process exited with ' +
`code ${code} and signal ${signal}`;
console.log(msg);
if (code !== 0) {
job.fail(msg);
}
done();
});
}
}
export default BuildPDFJob;
\ No newline at end of file
import {zipProject} from "../lib/zip_project";
import BuildPDFJob from "./build_pdf_job";
import {ObjectId} from "mongojs";
export default class ZipJob {
constructor(sldb, project_id, course, after_zip) {
this.sldb = sldb;
this.project_id = project_id;
this.course = course;
this.after_zip = after_zip;
}
jobFunc(job, done) {
job.attrs.data.logs = ["Loading Project"];
job.save();
this.sldb.projects.findOne({_id: ObjectId(this.project_id)}, (err, project) => {
if (err || !project) {
job.fail(`Cannot find sharelatex project id ${this.project_id} for course ${this.course}`);
done();
return;
}
job.attrs.data.logs.push("Loaded Project");
job.save();
zipProject(this.sldb, project, (err, details) => {
if (err) {
job.fail(`Zip Failed for course ${this.course}`);
return;
}
if (this.after_zip) {
this.after_zip(details, project, job, done);
} else {
done();
}
}, (progress_msg) => {
job.attrs.data.logs.push(progress_msg);
job.save();
})
});
}}
import async from "async";
import archiver from "archiver";
import logger from "../logger";
import {ObjectId} from "mongojs";
import path from 'path';
import fs from 'fs';
//const SL_DATA_PATH = "/var/lib/sharelatex";
const SL_DATA_PATH = "/Users/iainbryson/Projects/DTUQuiz/enote-devel/vol/sharelatex-data/";
function _getAllFoldersFromProject(project, callback) {
var folders = {}
const processFolder = (basePath, folder) => {
folders[basePath] = folder;
for (var childFolder of (folder.folders || [])) {
if (childFolder.name !== undefined) {
processFolder(path.join(basePath, childFolder.name), childFolder);
}
}
}
processFolder("/", project.rootFolder[0])
callback(null, folders)
}
export function zipFilePath(project_id, project_name) {
return path.join(SL_DATA_PATH, "backups", `${project_id}_${project_name}.zip`);
}
export function zipProject(db, project, zipDone, progress = () => {} ) {
const archive = archiver("zip");
archive.on("error", (err) => {
console.log("Can't create zip");
console.dir(err);
})
const query = {project_id: ObjectId(project._id)};
progress(`ZIP: Getting project tree for ${project.name}`);
db.docs.find(query, (err, docs) => {
if (err) {
zipDone({message: "cannot query project", details: err}, null);
return;
}
var docContents = new Map(docs.map((doc) => [doc._id.toString(), doc]));
_getAllFoldersFromProject(project, (err, folders) => {
if (err) {
zipDone({message: "cannot get folders from project", details: err}, null);
return;
}
var docArchiveInfo = new Map();
var fileInfo = new Map();
for (let [folderPath, folder] of Object.entries(folders)) {
for (let doc of (folder.docs || [])) {
const docPath = path.join(folderPath, doc.name);
const content = docContents.get(doc._id.toString());
if (content !== undefined) {
docArchiveInfo.set(docPath, {
_id: doc._id,
name: doc.name,
lines: content.lines,
rev: content.rev
});
} else {
console.log("STRANGE! no content for ")
console.log(`${docPath}: ${content}`);
console.dir(doc._id.toString());
}
}
const filesRootPath = path.join(SL_DATA_PATH, "data", "user_files");
for (let fileRef of (folder.fileRefs || [])) {
const filePath = path.join(folderPath, fileRef.name);
const content = fs.createReadStream(path.join(filesRootPath, `${project._id.toString()}_${fileRef._id.toString()}`));
if (content !== undefined) {
fileInfo.set(filePath, {
_id: fileRef._id,
name: fileRef.name,
content: content,
});
} else {
console.log("STRANGE! can't find file for fileRef")
console.log(`${filePath}: ${content}`);
console.dir(fileRef._id.toString());
}
}
}
var jobs = [];
for (let [path, doc] of docArchiveInfo) {
const relPath = path[0] === "/" ? path.slice(1) : path;
jobs.push( (callback) => {
const msg = `ZIP: Adding doc ${relPath} to archive`;
logger.debug(msg);
progress(msg);
archive.append(doc.lines.join("\n"), {name:relPath});
callback();
})
}
for (let [path, doc] of fileInfo) {
const relPath = path[0] === "/" ? path.slice(1) : path;
jobs.push( (callback) => {
const msg = `ZIP: Adding file ${relPath} to archive`;
logger.debug(msg);
progress(msg);
archive.append(doc.content, {name:relPath});
callback();
})
}
const zipPath = zipFilePath(project._id, project.name);
var output = fs.createWriteStream(zipPath);
archive.pipe(output);
const msg = `ZIP: Archiving ${jobs.length} docs to archive`;
logger.info(msg);
progress(msg);
async.series(jobs, () => {
const msg = `ZIP: Finalizing to archive`;
logger.debug(msg);
progress(msg);
archive.finalize();
zipDone(null, {
project_id: project._id.toString(),
path: zipPath
})
})
});
})
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment