From 1ad9418a6f2264544d3b25ca0a2546e5421f4d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20M=C3=BCller?= Date: Sat, 14 Sep 2024 15:54:30 +0200 Subject: [PATCH] Add code for revision cleanup (#6442) * Add initial code for revision cleanup * Some improvements - code cleanup * Cleanup logging * Add button in admin backend to cleanup revisions of a specific pad * Disable cleanup by default and show errors in admin area * Improve cleanup code * Load revisions for cleanup in parallel * Consider saved revisions during pad cleanup --- admin/public/ep_admin_pads/de.json | 1 + admin/public/ep_admin_pads/en.json | 1 + admin/src/pages/PadPage.tsx | 44 ++++++- settings.json.docker | 8 ++ settings.json.template | 8 ++ src/node/handler/PadMessageHandler.ts | 4 +- src/node/hooks/express/adminsettings.ts | 35 +++++ src/node/types/Revision.ts | 9 ++ src/node/utils/Cleanup.ts | 168 ++++++++++++++++++++++++ src/node/utils/Settings.ts | 8 ++ 10 files changed, 283 insertions(+), 3 deletions(-) create mode 100644 src/node/types/Revision.ts create mode 100644 src/node/utils/Cleanup.ts diff --git a/admin/public/ep_admin_pads/de.json b/admin/public/ep_admin_pads/de.json index afb553caf46..67dd73ddf07 100644 --- a/admin/public/ep_admin_pads/de.json +++ b/admin/public/ep_admin_pads/de.json @@ -14,6 +14,7 @@ "ep_adminpads2_autoupdate.title": "Aktiviert oder deaktiviert automatische Aktualisierungen für die aktuelle Abfrage.", "ep_adminpads2_confirm": "Willst du das Pad {{padID}} wirklich löschen?", "ep_adminpads2_delete.value": "Löschen", + "ep_adminpads2_cleanup": "Historie aufräumen", "ep_adminpads2_last-edited": "Zuletzt bearbeitet", "ep_adminpads2_loading": "Lädt...", "ep_adminpads2_manage-pads": "Pads verwalten", diff --git a/admin/public/ep_admin_pads/en.json b/admin/public/ep_admin_pads/en.json index 8a9044b1b84..76354c6403f 100644 --- a/admin/public/ep_admin_pads/en.json +++ b/admin/public/ep_admin_pads/en.json @@ -4,6 +4,7 @@ "ep_adminpads2_autoupdate.title": "Enables or disables automatic updates for the current query.", "ep_adminpads2_confirm": "Do you really want to delete the pad {{padID}}?", "ep_adminpads2_delete.value": "Delete", + "ep_adminpads2_cleanup": "Cleanup revisions", "ep_adminpads2_last-edited": "Last edited", "ep_adminpads2_loading": "Loading…", "ep_adminpads2_manage-pads": "Manage pads", diff --git a/admin/src/pages/PadPage.tsx b/admin/src/pages/PadPage.tsx index e663603cdd9..b5db854f567 100644 --- a/admin/src/pages/PadPage.tsx +++ b/admin/src/pages/PadPage.tsx @@ -6,7 +6,7 @@ import {useDebounce} from "../utils/useDebounce.ts"; import {determineSorting} from "../utils/sorting.ts"; import * as Dialog from "@radix-ui/react-dialog"; import {IconButton} from "../components/IconButton.tsx"; -import {ChevronLeft, ChevronRight, Eye, Trash2} from "lucide-react"; +import {ChevronLeft, ChevronRight, Eye, Trash2, FileStack} from "lucide-react"; import {SearchField} from "../components/SearchField.tsx"; export const PadPage = ()=>{ @@ -23,6 +23,7 @@ export const PadPage = ()=>{ const pads = useStore(state=>state.pads) const [currentPage, setCurrentPage] = useState(0) const [deleteDialog, setDeleteDialog] = useState(false) + const [errorText, setErrorText] = useState(null) const [padToDelete, setPadToDelete] = useState('') const pages = useMemo(()=>{ if(!pads){ @@ -68,12 +69,35 @@ export const PadPage = ()=>{ results: newPads }) }) + + settingsSocket.on('results:cleanupPadRevisions', (data)=>{ + let newPads = useStore.getState().pads?.results ?? [] + + if (data.error) { + setErrorText(data.error) + return + } + + newPads.forEach((pad)=>{ + if (pad.padName === data.padId) { + pad.revisionNumber = data.keepRevisions + } + }) + + useStore.getState().setPads({ + results: newPads, + total: useStore.getState().pads!.total + }) + }) }, [settingsSocket, pads]); const deletePad = (padID: string)=>{ settingsSocket?.emit('deletePad', padID) } + const cleanupPad = (padID: string)=>{ + settingsSocket?.emit('cleanupPadRevisions', padID) + } return
@@ -100,6 +124,21 @@ export const PadPage = ()=>{ + + + + +
+
Error occured: {errorText}
+
+ +
+
+
+
+

setSearchTerm(v.target.value)} placeholder={t('ep_admin_pads:ep_adminpads2_search-heading')}/> @@ -150,6 +189,9 @@ export const PadPage = ()=>{ setPadToDelete(pad.padName) setDeleteDialog(true) }}/> + } title={} onClick={()=>{ + cleanupPad(pad.padName) + }}/> } title="view" onClick={()=>window.open(`/p/${pad.padName}`, '_blank')}/> diff --git a/settings.json.docker b/settings.json.docker index 109f36bfd6c..da1d51c13a8 100644 --- a/settings.json.docker +++ b/settings.json.docker @@ -171,6 +171,14 @@ */ "showSettingsInAdminPage": "${SHOW_SETTINGS_IN_ADMIN_PAGE:true}", + /* + * Settings for cleanup of pads + */ + "cleanup": { + "enabled": false, + "keepRevisions": 5 + }, + /* The authentication method used by the server. The default value is sso diff --git a/settings.json.template b/settings.json.template index fe10e51b9cc..2d856f42e4c 100644 --- a/settings.json.template +++ b/settings.json.template @@ -162,6 +162,14 @@ */ "showSettingsInAdminPage": true, + /* + * Settings for cleanup of pads + */ + "cleanup": { + "enabled": false, + "keepRevisions": 5 + }, + /* * Node native SSL support * diff --git a/src/node/handler/PadMessageHandler.ts b/src/node/handler/PadMessageHandler.ts index 6286f282eec..9f1c9e86bc0 100644 --- a/src/node/handler/PadMessageHandler.ts +++ b/src/node/handler/PadMessageHandler.ts @@ -1147,7 +1147,7 @@ const getChangesetInfo = async (pad: PadType, startNum: number, endNum:number, g getPadLines(pad, startNum - 1), // Get all needed composite Changesets. ...compositesChangesetNeeded.map(async (item) => { - const changeset = await composePadChangesets(pad, item.start, item.end); + const changeset = await exports.composePadChangesets(pad, item.start, item.end); composedChangesets[`${item.start}/${item.end}`] = changeset; }), // Get all needed revision Dates. @@ -1213,7 +1213,7 @@ const getPadLines = async (pad: PadType, revNum: number) => { * Tries to rebuild the composePadChangeset function of the original Etherpad * https://github.com/ether/pad/blob/master/etherpad/src/etherpad/control/pad/pad_changeset_control.js#L241 */ -const composePadChangesets = async (pad: PadType, startNum: number, endNum: number) => { +exports.composePadChangesets = async (pad: PadType, startNum: number, endNum: number) => { // fetch all changesets we need const headNum = pad.getHeadRevisionNumber(); endNum = Math.min(endNum, headNum + 1); diff --git a/src/node/hooks/express/adminsettings.ts b/src/node/hooks/express/adminsettings.ts index 63d901f2126..4c60a05ad20 100644 --- a/src/node/hooks/express/adminsettings.ts +++ b/src/node/hooks/express/adminsettings.ts @@ -13,6 +13,7 @@ const settings = require('../../utils/Settings'); const UpdateCheck = require('../../utils/UpdateCheck'); const padManager = require('../../db/PadManager'); const api = require('../../db/API'); +const cleanup = require('../../utils/Cleanup'); const queryPadLimit = 12; @@ -252,6 +253,40 @@ exports.socketio = (hookName: string, {io}: any) => { } }) + socket.on('cleanupPadRevisions', async (padId: string) => { + if (!settings.cleanup.enabled) { + socket.emit('results:cleanupPadRevisions', { + error: 'Cleanup disabled. Enable cleanup in settings.json: cleanup.enabled => true', + }); + return; + } + + const padExists = await padManager.doesPadExists(padId); + if (padExists) { + logger.info(`Cleanup pad revisions: ${padId}`); + try { + const result = await cleanup.deleteRevisions(padId, settings.cleanup.keepRevisions) + if (result) { + socket.emit('results:cleanupPadRevisions', { + padId: padId, + keepRevisions: settings.cleanup.keepRevisions, + }); + logger.info('successful cleaned up pad: ', padId) + } else { + socket.emit('results:cleanupPadRevisions', { + error: 'Error cleaning up pad', + }); + } + } catch (err: any) { + logger.error(`Error in pad ${padId}: ${err.stack || err}`); + socket.emit('results:cleanupPadRevisions', { + error: err.toString(), + }); + return; + } + } + }) + socket.on('restartServer', async () => { logger.info('Admin request to restart server through a socket on /admin/settings'); settings.reloadSettings(); diff --git a/src/node/types/Revision.ts b/src/node/types/Revision.ts new file mode 100644 index 00000000000..8a9d65e29cf --- /dev/null +++ b/src/node/types/Revision.ts @@ -0,0 +1,9 @@ +import {AChangeSet} from "./PadType"; + +export type Revision = { + changeset: AChangeSet, + meta: { + author: string, + timestamp: number, + } +} diff --git a/src/node/utils/Cleanup.ts b/src/node/utils/Cleanup.ts new file mode 100644 index 00000000000..7e480020dda --- /dev/null +++ b/src/node/utils/Cleanup.ts @@ -0,0 +1,168 @@ +'use strict' + +import {AChangeSet} from "../types/PadType"; +import {Revision} from "../types/Revision"; + +const promises = require('./promises'); +const padManager = require('ep_etherpad-lite/node/db/PadManager'); +const db = require('ep_etherpad-lite/node/db/DB'); +const Changeset = require('ep_etherpad-lite/static/js/Changeset'); +const padMessageHandler = require('ep_etherpad-lite/node/handler/PadMessageHandler'); +const log4js = require('log4js'); +const logger = log4js.getLogger('cleanup'); + +exports.deleteAllRevisions = async (padID: string): Promise => { + + const randomPadId = padID + 'aertdfdf' + Math.random().toString(10) + + let pad = await padManager.getPad(padID); + await pad.copyPadWithoutHistory(randomPadId, false); + pad = await padManager.getPad(randomPadId); + await pad.copyPadWithoutHistory(padID, true); + await pad.remove(); +} + +const createRevision = async (aChangeset: AChangeSet, timestamp: number, isKeyRev: boolean, authorId: string, atext: any, pool: any) => { + + if (authorId !== '') pool.putAttrib(['author', authorId]); + + return { + changeset: aChangeset, + meta: { + author: authorId, + timestamp: timestamp, + ...isKeyRev ? { + pool: pool, + atext: atext, + } : {}, + }, + }; +} + +exports.deleteRevisions = async (padId: string, keepRevisions: number): Promise => { + + logger.debug('Start cleanup revisions', padId) + + let pad = await padManager.getPad(padId); + await pad.check() + + logger.debug('Initial pad is valid') + + if (pad.head <= keepRevisions) { + logger.debug('Pad has not enough revisions') + return false + } + + padMessageHandler.kickSessionsFromPad(padId) + + const cleanupUntilRevision = pad.head - keepRevisions + logger.debug('Composing changesets: ', cleanupUntilRevision) + const changeset = await padMessageHandler.composePadChangesets(pad, 0, cleanupUntilRevision + 1) + + const revisions: Revision[] = []; + + await promises.timesLimit(keepRevisions + 1, 500, async (i: number) => { + const rev = i + cleanupUntilRevision + revisions[rev] = await pad.getRevision(rev) + }); + + logger.debug('Loaded revisions: ', revisions.length) + + await promises.timesLimit(pad.head + 1, 500, async (i: string) => { + await db.remove(`pad:${padId}:revs:${i}`, null); + }); + + let padContent = await db.get(`pad:${padId}`) + padContent.head = keepRevisions + if (padContent.savedRevisions) { + let newSavedRevisions = [] + + for (let i = 0; i < padContent.savedRevisions.length; i++) { + if (padContent.savedRevisions[i].revNum > cleanupUntilRevision) { + padContent.savedRevisions[i].revNum = padContent.savedRevisions[i].revNum - cleanupUntilRevision + newSavedRevisions.push(padContent.savedRevisions[i]) + } + } + padContent.savedRevisions = newSavedRevisions + } + await db.set(`pad:${padId}`, padContent); + + let newAText = Changeset.makeAText('\n'); + let pool = pad.apool() + + newAText = Changeset.applyToAText(changeset, newAText, pool); + + const revision = await createRevision( + changeset, + revisions[cleanupUntilRevision].meta.timestamp, + 0 === pad.getKeyRevisionNumber(0), + '', + newAText, + pool + ); + + const p: Promise[] = []; + + p.push(db.set(`pad:${padId}:revs:0`, revision)) + + p.push(promises.timesLimit(keepRevisions, 500, async (i: number) => { + const rev = i + cleanupUntilRevision + 1 + const newRev = rev - cleanupUntilRevision; + + newAText = Changeset.applyToAText(revisions[rev].changeset, newAText, pool); + + const revision = await createRevision( + revisions[rev].changeset, + revisions[rev].meta.timestamp, + newRev === pad.getKeyRevisionNumber(newRev), + revisions[rev].meta.author, + newAText, + pool + ); + + await db.set(`pad:${padId}:revs:${newRev}`, revision); + })); + + await Promise.all(p) + + logger.debug('Finished migration. Checking pad now') + + padManager.unloadPad(padId); + + let newPad = await padManager.getPad(padId); + await newPad.check(); + + return true +} + +exports.checkTodos = async () => { + await new Promise(resolve => setTimeout(resolve, 5000)); + + // TODO: Move to settings + const settings = { + minHead: 100, + keepRevisions: 100, + minAge: 1,//1000 * 60 * 60 * 24, + } + + await Promise.all((await padManager.listAllPads()).padIDs.map(async (padId: string) => { + // TODO: Handle concurrency + const pad = await padManager.getPad(padId); + + const revisionDate = await pad.getRevisionDate(pad.getHeadRevisionNumber()) + + if (pad.head < settings.minHead || padMessageHandler.padUsersCount(padId) > 0 || Date.now() < revisionDate + settings.minAge) { + return + } + + try { + const result = await exports.deleteRevisions(padId, settings.keepRevisions) + if (result) { + logger.info('successful cleaned up pad: ', padId) + } + } catch (err: any) { + logger.error(`Error in pad ${padId}: ${err.stack || err}`); + return; + } + })); +} diff --git a/src/node/utils/Settings.ts b/src/node/utils/Settings.ts index 4ff117ad367..4d7b421e1c8 100644 --- a/src/node/utils/Settings.ts +++ b/src/node/utils/Settings.ts @@ -380,6 +380,14 @@ exports.sso = { */ exports.showSettingsInAdminPage = true; +/* + * Settings for cleanup of pads + */ +exports.cleanup = { + enabled: false, + keepRevisions: 100, +} + /* * By default, when caret is moved out of viewport, it scrolls the minimum * height needed to make this line visible.