Utente:LeDeuxiemeTexte/GoogleOCRFrench.js
(Redirect 'a Utente:George2etexte/GoogleOCRFrench.js)
Notarella: Aroppo pubbreca putisse necessità 'e pulezzà 'a caché d' 'o navigatóre pe vedé 'e cagnamienti.
- Firefox / Safari: Sprémme 'o buttóne maiuscole e ffà clic ncopp'a Recarreca, o pure spremme Ctrl-F5 o Ctrl-R (⌘-R ncopp'a Mac)
- Google Chrome: spremme Ctrl-Shift-R (⌘-Shift-R ncopp'a nu Mac)
- Internet Explorer/edge: Spremme 'o buttóne Ctrl pe' tramente ca faie click ncopp'a Refresh, o pure spremmere Ctrl-F5
- Opera: Vaje addò 'o Menu → Mpustaziune (Opera → Mpustaziune ncopp' 'o Mac) e po' ncopp'a Privacy & sicurezza → Pulezza date d' 'o browser → Immaggene e file d' 'a cache.
/*jshint boss:true*/
/*global $, mw*/
/*
Original script from https://nap.wikisource.org/wiki/Utente:Alex_brollo/GoogleOCR.js
To use it: https://nap.wikisource.org/w/index.php?title=User:George2etexte/GoogleOCRFrench.js&action=raw&ctype=text/javascript
Added some code below to post-process texts in French
*/
/**
* This script adds a toolbar button that replaces the editbox text with OCR text
* derived by sending the .prp-page-image image through Google's Vision API.
*
* For more information, see https://wikisource.org/wiki/Wikisource:Google_OCR
*/
( function ( mw, $ ) {
var lang = mw.config.get( 'wgContentLanguage' );
// Questo if ridefinisce lang in "it" per le tre wikisource italiane minori
if (["nap","vec","pms"].indexOf(lang)!==-1) {
lang="it";
}
var toolUrl = "//tools.wmflabs.org/ws-google-ocr/api.php";
var loadingGifUrl = '//upload.wikimedia.org/wikipedia/commons/4/42/Loading.gif';
var sysMessages = [ 'google-ocr-button-label', 'google-ocr-request-in-progress', 'google-ocr-no-text', 'google-ocr-image-not-found' ];
/**
* The initialisation function, run on every load. Adds the OCR button to the
* toolbar if we're currently editing or previewing in the Page namespace.
*/
function run() {
var isPage, useOldToolbar, useBetaToolbar, toolbarLib;
mw.loader.using( 'user.options', function () {
isPage = mw.config.get( 'wgCanonicalNamespace' ) === 'Page';
useOldToolbar = mw.user.options.get( 'showtoolbar' ) === 1;
useBetaToolbar = mw.user.options.get( 'usebetatoolbar' ) === 1;
if ( isPage && ( useOldToolbar || useBetaToolbar ) ) {
toolbarLib = useBetaToolbar ? 'ext.wikiEditor' : 'mediawiki.toolbar';
mw.loader.using( [ 'mediawiki.api', toolbarLib ], function () {
new mw.Api().loadMessagesIfMissing( sysMessages ).then( function() { customizeToolbar( useBetaToolbar ); } );
} );
}
} );
}
/**
* Add the OCR button to the toolbar. This is called in run, and doesn't
* need to check anything about whether we need to add the button.
*
* @param {boolean} useBeta Whether the WikiEditor toolbar should be used.
*/
function customizeToolbar( useBeta ) {
// Add old-style toolbar button.
if ( ! useBeta && mw.toolbar ) {
mw.toolbar.addButton( {
imageFile: 'https://upload.wikimedia.org/wikipedia/commons/c/ca/GoogleOcr_toolbar_button.png',
speedTip: mw.msg( 'google-ocr-button-label' ),
imageId: 'GoogleOcrButton'
} );
$("img#GoogleOcrButton").on('click', doOcr).css("width", "50px");
}
// Add new-style WikiEditor toolbar button.
if ( useBeta ) {
$( document ).ready( function () {
var ocrButtonDetails = {
type: 'button',
icon: 'https://upload.wikimedia.org/wikipedia/commons/b/bd/GoogleOcr_WikiEditor_button.png',
labelMsg: 'google-ocr-button-label',
action: { type: 'callback', execute: doOcr }
};
var ocrButton = {
section: 'main', // 'proofreadpage-tools',
group: 'insert', // 'other',
tools: { 'GoogleOcr': ocrButtonDetails }
};
$( "#wpTextbox1" ).wikiEditor( 'addToToolbar', ocrButton );
$( "a[rel='GoogleOcr']" ).css("width", "42px");
} );
}
// Pre-load the loading gif.
$( '<img />' ).attr( 'src', loadingGifUrl ).appendTo( 'body' ).hide();
}
/**
* This function is run when the OCR button is clicked. It sends the page
* image to the API and replace the editbox's text with the restult.
*/
function doOcr() {
if ( $( '.prp-page-image img' ).length === 0 ) {
mw.notify( mw.msg( 'google-ocr-image-not-found' ) );
}
// Send the HTTPS URL because this will be accessed by PHP in the tool.
showLoadingMsg( 'google-ocr-request-in-progress' );
var imageUrl = 'https:' + $( '.prp-page-image img' ).attr('src');
var requestUrl = toolUrl + "?image=" + imageUrl + "&lang="+lang;
$.getJSON( requestUrl )
.done( processOcrResult )
.fail( processOcrResult ) // Same handler, for simplicity.
.always( function () { showLoadingMsg( '' ); } );
}
/**
* The API result (either the OCR'd text, or an error message) is processed by
* this function.
*
* @param {string} data The response (either text or error) returned from the API.
*/
function processOcrResult( response ) {
if ( response.responseJSON !== undefined && response.responseJSON.error ) {
mw.notify( mw.msg( 'error' ) + ' ' + response.responseJSON.error.code + ' ' + response.responseJSON.error.message );
return;
}
if ( response.text === undefined || response.text.length === 0 ) {
mw.notify( mw.msg( 'google-ocr-no-text' ) );
return;
}
// Postprocess French texts:
console.log("postprocessing...")
console.log(response);
var text = response.text;
// Replace - in the beginning of a line by — (for dialogues)
text = text.replace(/\n-([^ ])/g, '\n— $1').replace(/\n- /g, '\n— ');
// Glue together parts of words cut in the end of a line
text = text.replace(/-[ ]*\n([^ ]+ )/g, '$1\n');
// Remove the first lines if they are made only of digits or uppercase characters and punctuations (probably page headers)
text = ('start¤'+text).replace(/(start¤)[0-9A-ZÉÈÊËÀÂÄÎÏÌÔÖÒÜÙÛÇ.\-, ]+\n/, '$1').replace(/(start¤)[0-9]+\n/, '$1').replace(/(start¤)[A-Z.\-, ]+\n/, '$1').replace(/(start¤)[0-9]+\n/, '$1').replace(/(start¤)[A-ZÉÈÊËÀÂÄÎÏÌÔÖÒÜÙÛÇ.\-, ]+\n/, '$1').replace(/start¤/, '');
// Add a new line if the line is too short (end of a paragraph?)
text = text.replace(/\n(.{2,20})\n/g, '\n$1\n\n');
$( '#wpTextbox1' ).val( text );
}
/**
* Show (or hide) a loading message. Pass false to remove the message altogether.
*
* @param {string} msgLabel The label of the system message to show.
*/
function showLoadingMsg( msgLabel ) {
var msg, msgBox, loadingGif
loadingId = 'GoogleOcrLoading';
// Always remove any existing message.
$( '#' + loadingId ).remove();
// Add the new message if required.
if ( msgLabel.length !== 0 ) {
msg = mw.message( msgLabel ).plain();
msgBox = $( "<p>" )
.attr( "id", loadingId )
.css( "background-color", "#efefef" ).css( "border", "1px solid #ccc" )
.text( msg );
loadingGif = $( "<img>" )
.attr( "src", loadingGifUrl )
.attr( "alt", "Animated loading indicator" )
.css( "display", "inline-block" ).css( "margin", "0.3em" );
msgBox.prepend( loadingGif );
$( '#wpTextbox1' ).before( msgBox );
}
}
run();
}( mediaWiki, jQuery ) );