/* * From: https://github.com/not-implemented/hocr-proofreader */ 'use strict'; // --- Configuration --- window.DINGS_USE_PROGRESSIVE = true; // Default prefix if not set elsewhere window.DINGS_HOCR_PREFIX = window.DINGS_HOCR_PREFIX || "300040017"; // --- Helpers --- function buildHocrFileName(pageNumber, suffix = '') { const prefix = window.DINGS_HOCR_PREFIX; return `${prefix}.page_${pageNumber}${suffix}.hocr`; } var Util = { onReady: function (callback) { if (document.readyState != 'loading') callback(); else document.addEventListener('DOMContentLoaded', callback); }, get: function (url, callback) { var request = new XMLHttpRequest(); request.open('GET', url); request.onload = function () { if (request.status >= 200 && request.status < 400) { callback(null, request.responseText); } else { callback(new Error('HTTP Error: ' + request.status)); } }; request.onerror = function () { callback(new Error('Connection error')); }; request.send(); }, createElem: function (name, attrs) { var node = document.createElement(name); for (var n in attrs) node.setAttribute(n, attrs[n]); return node; }, createSvgElem: function (name, attrs) { var node = document.createElementNS('http://www.w3.org/2000/svg', name); for (var n in attrs) node.setAttribute(n, attrs[n]); return node; }, removeChildren: function (node) { while (node.hasChildNodes()) node.removeChild(node.lastChild); } }; function HocrProofreader(config) { this.config = config; this.layoutContainer = document.getElementById(config.layoutContainer); this.editorContainer = document.getElementById(config.editorContainer); // --- CLEANUP GUARD: Prevents visual duplication --- if (this.layoutContainer) this.layoutContainer.innerHTML = ''; if (this.editorContainer) { var existing = this.editorContainer.querySelector('iframe.editor'); if (existing) existing.remove(); } this.layoutSvg = Util.createSvgElem('svg', {'class': 'layout'}); this.layoutBackground = Util.createSvgElem('rect', {'class': 'background', 'x': 0, 'y': 0, 'width': '100%', 'height': '100%', 'style': 'fill: none'}); this.layoutSvg.appendChild(this.layoutBackground); this.layoutImage = Util.createSvgElem('image', {'x': 0, 'y': 0, 'width': '100%', 'height': '100%'}); this.layoutSvg.appendChild(this.layoutImage); this.layoutWords = Util.createSvgElem('g', {'class': 'words'}); this.layoutSvg.appendChild(this.layoutWords); this.layoutRects = Util.createSvgElem('g', {'class': 'rects'}); this.layoutSvg.appendChild(this.layoutRects); this.layoutContainer.appendChild(this.layoutSvg); this.layoutContainer.style.overflow = 'scroll'; this.editorIframe = Util.createElem('iframe', {'class': 'editor', 'frameborder': 0}); this.editorContainer.appendChild(this.editorIframe); var self = this; self.hoveredNode = null; self.mousePosition = null; this.layoutSvg.addEventListener('mousemove', function (event) { self.mousePosition = {container: 'layout', x: event.clientX, y: event.clientY}; self.onHover(event.target); }); this.layoutSvg.addEventListener('mouseleave', function (event) { self.mousePosition = null; self.onHover(null); }); this.layoutContainer.addEventListener('scroll', function (event) { if (!self.mousePosition || self.mousePosition.container !== 'layout') return; self.onHover(document.elementFromPoint(self.mousePosition.x, self.mousePosition.y)); }); this.currentPage = null; this.toggleLayoutImage(); } HocrProofreader.prototype.setHocr = function (hocr, baseUrl) { this.hocrBaseUrl = baseUrl; var hocrDoc = this.editorIframe.contentDocument; hocrDoc.open(); hocrDoc.write(hocr); hocrDoc.close(); var self = this; var hocrRoot = hocrDoc.documentElement; hocrRoot.addEventListener('mousemove', function (event) { self.mousePosition = {container: 'editor', x: event.clientX, y: event.clientY}; self.onHover(event.target, true); }); hocrRoot.addEventListener('mouseleave', function (event) { self.mousePosition = null; self.onHover(null, true); }); hocrDoc.addEventListener('scroll', function (event) { if (!self.mousePosition || self.mousePosition.container !== 'editor') return; self.onHover(hocrDoc.elementFromPoint(self.mousePosition.x, self.mousePosition.y), true); }); this.editorStylesheet = Util.createElem('link', {'type': 'text/css', 'rel': 'stylesheet', 'href': '300040017.css'}); hocrDoc.head.appendChild(this.editorStylesheet); hocrDoc.body.contentEditable = true; this.setPage('first'); }; HocrProofreader.prototype.getHocr = function () { var hocrDoc = this.editorIframe.contentDocument; hocrDoc.head.removeChild(this.editorStylesheet); hocrDoc.body.contentEditable = 'inherit'; this.onHover(null); var serializer = new XMLSerializer(); var hocr = serializer.serializeToString(hocrDoc); hocrDoc.head.appendChild(this.editorStylesheet); hocrDoc.body.contentEditable = true; return hocr; }; HocrProofreader.prototype.setZoom = function (zoom) { if (zoom) this.currentZoom = zoom; if (this.currentZoom === 'page-full') { this.toggleFullscreen("layout-container"); } else if (this.currentZoom === 'page-width') { this.toggleIframeFullscreen("editor-container"); } else if (this.currentZoom === 'original') { if (this.currentPage) { let options = this.getNodeOptions(this.currentPage); this.layoutSvg.style.width = `${options.bbox[2] - options.bbox[0]}px`; this.layoutSvg.style.height = `${options.bbox[3] - options.bbox[1]}px`; } else { this.layoutSvg.style.width = null; this.layoutSvg.style.height = null; } this.layoutSvg.style.maxWidth = null; this.layoutSvg.style.maxHeight = null; } }; HocrProofreader.prototype.toggleFullscreen = function (elementId) { let container = document.getElementById(elementId); if (!container) return; if (!document.fullscreenElement) { if (container.requestFullscreen) container.requestFullscreen(); else if (container.webkitRequestFullscreen) container.webkitRequestFullscreen(); else if (container.msRequestFullscreen) container.msRequestFullscreen(); document.addEventListener("keydown", this.handleKeyNavigation.bind(this)); } else { if (document.exitFullscreen) document.exitFullscreen(); else if (document.webkitExitFullscreen) document.webkitExitFullscreen(); else if (document.msExitFullscreen) document.msExitFullscreen(); document.removeEventListener("keydown", this.handleKeyNavigation.bind(this)); } }; HocrProofreader.prototype.handleKeyNavigation = function (event) { if (!document.fullscreenElement) return; if (event.key === "ArrowRight") this.setPage("next"); else if (event.key === "ArrowLeft") this.setPage("previous"); }; HocrProofreader.prototype.toggleIframeFullscreen = function (elementId) { let container = document.getElementById(elementId); let iframe = this.editorIframe; if (!container || !iframe) return; let iframeDoc = iframe.contentDocument || iframe.contentWindow.document; if (!iframeDoc) return; let elem = iframeDoc.documentElement; if (!document.fullscreenElement) { if (elem.requestFullscreen) elem.requestFullscreen(); else if (elem.webkitRequestFullscreen) elem.webkitRequestFullscreen(); else if (elem.msRequestFullscreen) elem.msRequestFullscreen(); document.addEventListener("keydown", this.handleKeyNavigation.bind(this)); } else { if (document.exitFullscreen) document.exitFullscreen(); else if (document.webkitExitFullscreen) document.webkitExitFullscreen(); else if (document.msExitFullscreen) document.msExitFullscreen(); document.removeEventListener("keydown", this.handleKeyNavigation.bind(this)); } }; HocrProofreader.prototype.toggleLayoutImage = function () { if (!this.layoutWords.style.display || this.layoutWords.style.display === 'block') { this.layoutWords.style.display = 'none'; this.layoutImage.style.display = 'block'; } else { this.layoutWords.style.display = 'block'; this.layoutImage.style.display = 'none'; } }; HocrProofreader.prototype.setPage = function (page) { var pageNode, backwards = false, skipCurrent = false; var hocrDoc = this.editorIframe.contentDocument; if (page === 'first') { pageNode = hocrDoc.body.firstElementChild; } else if (page === 'last') { pageNode = hocrDoc.body.lastElementChild; backwards = true; } else if (page === 'next') { pageNode = this.currentPage || hocrDoc.body.firstElementChild; skipCurrent = true; } else if (page === 'previous') { pageNode = this.currentPage || hocrDoc.body.lastElementChild; backwards = true; skipCurrent = true; } while (pageNode && (skipCurrent || !pageNode.classList.contains('ocr_page'))) { pageNode = backwards ? pageNode.previousElementSibling : pageNode.nextElementSibling; skipCurrent = false; } this.renderPage(pageNode || null); }; HocrProofreader.prototype.renderPage = function (pageNode) { this.layoutContainer.scrollTop = 0; this.layoutContainer.scrollLeft = 0; var scrollToBottom = false, tmpNode = this.currentPage; while (tmpNode) { tmpNode = tmpNode.previousElementSibling; if (tmpNode === pageNode) { scrollToBottom = true; break; } } function removeLinkedNodes(node) { if (node.linkedNode) node.linkedNode = null; var childNode = node.firstElementChild; while (childNode) { removeLinkedNodes(childNode); childNode = childNode.nextElementSibling; } } if (this.currentPage) removeLinkedNodes(this.currentPage); Util.removeChildren(this.layoutWords); Util.removeChildren(this.layoutRects); this.currentPage = pageNode; this.layoutImage.removeAttribute('transform'); if (!this.currentPage) return; var pageOptions = this.getNodeOptions(this.currentPage); this.layoutSvg.setAttribute('viewBox', pageOptions.bbox.join(' ')); this.layoutWords.style.fontFamily = 'serif'; this.layoutImage.setAttributeNS('http://www.w3.org/1999/xlink', 'href', this.hocrBaseUrl + pageOptions.image); if (pageOptions.textangle) { this.layoutImage.setAttribute('transform', 'rotate(' + pageOptions.textangle + ' ' + ((pageOptions.bbox[2] - pageOptions.bbox[0]) / 2) + ' ' + ((pageOptions.bbox[3] - pageOptions.bbox[1]) / 2) + ')'); } this.renderNodesRecursive(this.currentPage, pageOptions); if (scrollToBottom) { this.layoutContainer.scrollTop = this.layoutContainer.scrollHeight - this.layoutContainer.clientHeight; } }; HocrProofreader.prototype.renderNodesRecursive = function (node, options, parentRectsNode) { if (!parentRectsNode) parentRectsNode = this.layoutRects; var className = null; if (node.classList.contains('ocr_carea')) className = 'ocr_carea'; else if (node.classList.contains('ocr_par')) className = 'ocr_par'; else if (node.classList.contains('ocr_line')) className = 'ocr_line'; else if (node.classList.contains('ocrx_word')) className = 'ocrx_word'; if (className) { if (className !== 'ocrx_word') { var groupNode = Util.createSvgElem('g', {'class': className}); parentRectsNode.appendChild(groupNode); parentRectsNode = groupNode; } options = this.inheritOptions(this.getNodeOptions(node), options); if (options.bbox) { if (className === 'ocrx_word' && options.baselineBbox) { var word = node.textContent; var textNode = Util.createSvgElem('text', { 'x': options.bbox[0], 'y': parseFloat(options.baselineBbox[3]) + parseFloat(options.baseline[1]), 'textLength': options.bbox[2] - options.bbox[0], 'lengthAdjust': 'spacingAndGlyphs' }); textNode.textContent = word; this.layoutWords.appendChild(textNode); } var rectNode = Util.createSvgElem('rect', { 'x': options.bbox[0], 'y': options.bbox[1], 'width': options.bbox[2] - options.bbox[0], 'height': options.bbox[3] - options.bbox[1], 'class': className }); parentRectsNode.appendChild(rectNode); rectNode.linkedNode = node; node.linkedNode = rectNode; } } var childNode = node.firstElementChild; while (childNode) { this.renderNodesRecursive(childNode, options, parentRectsNode); childNode = childNode.nextElementSibling; } }; HocrProofreader.prototype.getNodeOptions = function (node) { var asArray = ['bbox', 'baseline', 'scan_res']; var optionsStr = node.title ? node.title : ''; var match, regex = /(?:^|;)\s*(\w+)\s+(?:([^;"']+?)|"((?:\\"|[^"])+?)"|'((?:\\'|[^'])+?)')\s*(?=;|$)/g; var options = {}; while (match = regex.exec(optionsStr)) { var name = match[1]; var value = match[4] || match[3] || match[2]; if (asArray.indexOf(name) !== -1) value = value.split(/\s+/); options[name] = value; } return options; }; HocrProofreader.prototype.inheritOptions = function (options, parentOptions) { var inheritableOptions = ['baseline', 'baselineBbox', 'x_fsize', 'scan_res']; if ('baseline' in options && 'bbox' in options) options.baselineBbox = options.bbox; if (parentOptions) { for (var name in parentOptions) { if (inheritableOptions.indexOf(name) === -1) continue; if (name in options) continue; options[name] = parentOptions[name]; } } return options; }; HocrProofreader.prototype.onHover = function (target, isEditorContainer) { if (target === this.hoveredNode) return; if (this.hoveredNode) { this.hoverTreeNodes(this.hoveredNode, false); this.hoverTreeNodes(this.hoveredNode.linkedNode, false); this.hoveredNode = null; } if (isEditorContainer) { var pageNode = target; while (pageNode && (!pageNode.classList.contains('ocr_page'))) { pageNode = pageNode.parentElement; } if (pageNode && pageNode !== this.currentPage) { this.renderPage(pageNode); } } var linkedNode = target && target.linkedNode; if (linkedNode) { this.hoverTreeNodes(target, true); this.hoverTreeNodes(linkedNode, true); this.hoveredNode = target; var linkedContainer = isEditorContainer ? this.layoutContainer : this.editorIframe.contentDocument.documentElement; this.scrollIntoViewIfNeeded(linkedNode, linkedContainer); } }; HocrProofreader.prototype.hoverTreeNodes = function (node, isActive) { while (node) { if (node.classList.contains('ocr_page') || node.classList.contains('rects')) break; if (isActive) node.classList.add('hover'); else node.classList.remove('hover'); node = node.parentElement; } }; HocrProofreader.prototype.scrollIntoViewIfNeeded = function (node, scrollParentNode) { var rect = node.getBoundingClientRect(); var parentRect = scrollParentNode.parentElement ? scrollParentNode.getBoundingClientRect() : {left: 0, top: 0}; var nodeRect = { left: rect.left - parentRect.left + scrollParentNode.scrollLeft, top: rect.top - parentRect.top + scrollParentNode.scrollTop, right: rect.right - parentRect.left + scrollParentNode.scrollLeft, bottom: rect.bottom - parentRect.top + scrollParentNode.scrollTop }; if (nodeRect.bottom - nodeRect.top <= scrollParentNode.clientHeight) { if (nodeRect.bottom > scrollParentNode.scrollTop + scrollParentNode.clientHeight) { node.scrollIntoView({behavior: 'smooth', block: 'end'}); } else if (nodeRect.top < scrollParentNode.scrollTop) { node.scrollIntoView({behavior: 'smooth', block: 'start'}); } } if (nodeRect.right - nodeRect.left <= scrollParentNode.clientWidth) { if (nodeRect.right > scrollParentNode.scrollLeft + scrollParentNode.clientWidth) { node.scrollIntoView({behavior: 'smooth', block: 'end'}); } else if (nodeRect.left < scrollParentNode.scrollLeft) { node.scrollIntoView({behavior: 'smooth', block: 'end'}); } } };