MediaWiki:Gadget-OCR-tools.js
Замечание: Возможно, после публикации вам придётся очистить кэш своего браузера, чтобы увидеть изменения.
- Firefox / Safari: Удерживая клавишу Shift, нажмите на панели инструментов Обновить либо нажмите Ctrl+F5 или Ctrl+R (⌘+R на Mac)
- Google Chrome: Нажмите Ctrl+Shift+R (⌘+Shift+R на Mac)
- Internet Explorer / Edge: Удерживая Ctrl, нажмите Обновить либо нажмите Ctrl+F5
- Opera: Нажмите Ctrl+F5.
function ocr_1() {
var textArea = $('#wpTextbox1');
var txt = textArea.textSelection('getSelection');
if (txt === '') {
txt = textArea.textSelection('getContents');
txt = txt.replace(/^<noinclude>[\s\S]+?<\/noinclude>([\s\S]+)<noinclude>[\s\S]+?<\/noinclude>$/g, '$1'); // Fix 25-02-2017 by @Lozman
txt = ocr_1_inner(txt, 1);
textArea.textSelection('setContents', txt);
} else {
txt = ocr_1_inner(txt, 0);
textArea.textSelection('encapsulateSelection', {pre: txt, replace: true});
}
}
function ocr_1_inner(sel, r) {
// manipulate the text in the sel variable
sel = sel.replace(/[ \t]+$/gm, ''); // remove trailing spaces
sel = sel.replace(/^[ \t]+/gm, ''); // remove leading spaces
sel = sel.replace(/[ \t]+/gm, ' '); // remove multiple spaces
sel = sel.replace(/(\S)[\-¬\u00AD]\n(\S)/gm, '$1$2'); // merge lines in the paragraph separated with dash into single line
sel = sel.replace(/(\S)\n(\S)/gm, '$1 $2'); // merge other lines in the paragraph into single line
if (r) { // remove leading and trailing line feeds
sel = sel.replace(/\n/gm, '\x01'); // hide \n's
sel = sel.replace(/^\x01+/gm, ''); // remove leading line feeds
sel = sel.replace(/\x01+$/gm, ''); // remove trailing line feeds
sel = sel.replace(/\x01/gm, '\n'); // restore hidden \n's
}
return sel;
}
function ocr_2() {
var textArea = $('#wpTextbox1');
txt = textArea.textSelection('getContents');
txt = txt.replace(/^<noinclude>[\s\S]+?<\/noinclude>([\s\S]+)<noinclude>[\s\S]+?<\/noinclude>$/g, '$1'); // Fix 25-02-2017 by @Lozman
txt = ocr_2_inner(txt);
textArea.textSelection('setContents', txt);
}
function ocr_2_inner(sel) {
// manipulate the text in the sel variable
sel = sel.replace(/\n/gm, '\x01'); // hide \n's
sel = sel.replace(/^(\x01*)([\-a-zA-Z0-9А-Яа-яёЁІіѢѣѲѳѴѵ]+)/gm, '$1{{Перенос2|…|$2}}'); //
sel = sel.replace(/\x01/gm, '\n'); // restore hidden \n's
return sel;
}
function ocr_3() {
var textArea = $('#wpTextbox1');
txt = textArea.textSelection('getContents');
txt = txt.replace(/^<noinclude>[\s\S]+?<\/noinclude>([\s\S]+)<noinclude>[\s\S]+?<\/noinclude>$/g, '$1'); // Fix 25-02-2017 by @Lozman
txt = ocr_3_inner(txt);
textArea.textSelection('setContents', txt);
}
function ocr_3_inner(sel) {
// manipulate the text in the sel variable
sel = sel.replace(/\n/gm, '\x01'); // hide \n's
sel = sel.replace(/([\-a-zA-Z0-9А-Яа-яёЁІіѢѣѲѳѴѵ]*[a-zA-Z0-9А-Яа-яёЁІіѢѣѲѳѴѵ])[-¬\u00AD]?(\x01*<noinclude>|$)/gm, '{{Перенос|$1|…}}$2'); //
sel = sel.replace(/\x01/gm, '\n'); // restore hidden \n's
return sel;
}
function ocr_4() {
var textArea = $('#wpTextbox1');
// var len = 0;
var txt = textArea.textSelection('getSelection');
if (txt === '') {
txt = textArea.textSelection('getContents');
txt = txt.replace(/^<noinclude>[\s\S]+?<\/noinclude>([\s\S]+)<noinclude>[\s\S]+?<\/noinclude>$/g, '$1'); // Fix 25-02-2017 by @Lozman
len = txt.length;
txt = ocr_4_inner(txt);
textArea.textSelection('setContents', txt);
} else {
len = txt.length;
txt = ocr_4_inner(txt);
textArea.textSelection('encapsulateSelection', {pre: txt, replace: true});
}
// see implementation of the resulting string in ocr_4_inner() to calculate offsets
// 6 == length of '{{ВАР|'; 2 == length of '\n|'; 3 == length of '\n}}'...
// This's also correct and calculated without the len variable. However, this approach is trickier than used one line below:
var n1 = (((txt.length - (6 + 2 + 3)) / 2) + 6 + 2) - 0;
// var n1 = (len + 6 + 2) - 0;
var n2 = (txt.length - (3 - 1)) - 0;
textArea.textSelection('setSelection', {start: n1, end: n2});
textArea.textSelection('scrollToCaretPosition');
}
function ocr_4_inner(sel) {
// manipulate the text in the sel variable
sel = '{{ВАР|' + sel + '<!--\n-->|<!--\n-->' + sel + '\n}}';
return sel;
}
var lang = mw.config.get( 'wgContentLanguage' );
function ocr5_disable_input(set)
{
if (set) {
$(document).keyup(function(e) {
// You can press ESC to kill the query
if (e.which == 27) { ocr5_disable_input(false); }
});
}
set ? $('#ocr_tools_5').off('click') : $('#ocr_tools_5').on('click', ocr_5);
$('#wpTextbox1').prop('disabled', set);
}
function ocr5_callback (data) {
if (data.error) {
ocr5_disable_input (false);
/*do_ocr();*/ // Do not fallback to tesseract for Russian Wikisource!
return;
} else {
// Checking if tb is disabled is required with chrome as ESC doesn't kill
// the query.
var tb = document.getElementById("wpTextbox1");
if (tb.disabled) {
localStorage.ws_hOCR = data.text;
var text = $(data.text).text();
// Ugly as hell.
text = text.replace(/[ ]*\n[ ]*/g, '\n')
.replace(/\n\n\n\n/g, '@_@_@_@_@_@')
.replace(/\n\n/g, '\n')
.replace(/@_@_@_@_@_@/g, '\n\n')
.replace(/\n\n\n/g, '\n\n');
tb.value = $.trim(text);
}
}
ocr5_disable_input(false);
}
function ocr_5 () {
ocr5_disable_input(true);
// lang = 'ru'; // test, tmp
var request_url = '//tools.wmflabs.org/phetools/hocr_cgi.py?cmd=hocr&book='
+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');
$.getJSON(request_url).done(ocr5_callback);
}
function ocr_test () {
var textArea = $('#wpTextbox1');
txt = textArea.textSelection('getContents');
txt = txt.replace(/^<noinclude>[\s\S]+?<\/noinclude>([\s\S]+)<noinclude>[\s\S]+?<\/noinclude>$/g, '$1'); // Fix 25-02-2017 by @Lozman
txt = 'test';
textArea.textSelection('setContents', txt);
}
function ocr_6() {
var textArea = $('#wpTextbox1');
var txt = textArea.textSelection('getSelection');
if (txt === '') {
// nothing is selected
} else {
txt = ocr_6_inner(txt, 0);
textArea.textSelection('encapsulateSelection', {pre: txt, replace: true});
}
}
function ocr_6_inner(sel) {
// manipulate the text in the sel variable
sel = sel.replace(/[ \t]+/gm, ''); // remove spaces
return sel;
}