/**
* @file A content file which will be injected to the page. Communicaties with
* the extension via message passing. This file also works as the plugin storage for UI element settings.
* @author Ulrik Schremser
*/
var wordList = undefined;
var averageSentenceLengthEasy = 12.0;
var averageSentenceLengthHard = 42.0;
/*
@book{biber1998corpus,
title={Corpus linguistics: Investigating language structure and use},
author={Biber, Douglas and Conrad, Susan and Reppen, Randi},
year={1998},
publisher={Cambridge University Press}
pages={68}
}
*/
var averageAverageWordLengthEasy = 3.8;
var averageAverageWordLengthHard = 8.2;
var averageSentenceComplexityEasy = 0.05;
var averageSentenceComplexityHard = 0.15;
var averageWordComplexityEasy = 0.6;
var averageWordComplexityHard = 0.2;
var averageNtvRatioEasy = 1.2;
var averageNtvRatioHard = 2.2;
var sentenceLengthInfluence = 3500;
var averageWordLengthInfluence = 1200;
var ntvRatioInfluence = 800;
var sentenceComplexityInfluence = 2000;
var wordComplexityInfluence = 2500;
var enabled = false;
var tooltipEnabled = false;
String.prototype.endsWith = function(suffix) {
return this.indexOf(suffix, this.length - suffix.length) !== -1;
};
if (typeof String.prototype.startsWith != 'function') {
// see below for better implementation!
String.prototype.startsWith = function (str){
return this.indexOf(str) === 0;
};
}
String.prototype.regexIndexOf = function(regex, startpos) {
var indexOf = this.substring(startpos || 0).search(regex);
return (indexOf >= 0) ? (indexOf + (startpos || 0)) : indexOf;
}
/**
* Determins the sentences contained in a DOM Node by simple means,
* wraps them into spans and returns the list of these spans. TextNodes get split if necessary.
* @param {Node} nodeRoot - the root node in which to look for sentences. It has to contain only complete sentences.
* @returns {Array} - Returns an array of spans which wrap the Nodes of sentences.
* @memberof content.js
*/
function retrieveRawSentenceDoms(nodeRoot) {
var sentenceStarted = false;
var spanToUse = undefined;
var rawObjectSentenceDoms = [];
var node = nodeRoot.firstChild;
while (node) {
// We have got a text node here, of which we have to take special care:
// They might consist of multiple sentences!
if(node.nodeType == 3 && node.nodeValue.length != 0){
// Add all complete sentences
for(var index = node.nodeValue.regexIndexOf(/[.?!]/); index >= 0; index = node.nodeValue.regexIndexOf(/[.?!]/)){
// We have a sentence that ends here.
var newNode = node.splitText(index + 1);
// If there hasn't been a started sentence, this is a complete senetence
// and thus a new wrapper for this complete sentence has to be created.
if(!sentenceStarted){
spanToUse = document.createElement("span");
spanToUse.className = "sentence"
rawObjectSentenceDoms.push(spanToUse);
var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
nodeRoot.insertBefore(spanToUse, node);
sentenceStarted = true;
}
nodeRoot.removeChild(node);
spanToUse.appendChild(node);
sentenceStarted = false;
node = newNode;
}
// Add uncompleted sentences
// If there hasn't been a started sentence, this is a complete senetence
// and thus a new wrapper for this complete sentence has to be created.
if(node.nodeValue.length != 0){
if(!sentenceStarted){
spanToUse = document.createElement("span");
spanToUse.className = "sentence"
rawObjectSentenceDoms.push(spanToUse);
var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
// spanToUse.setAttribute("style", "background-color:" + color + ";");
nodeRoot.insertBefore(spanToUse, node);
sentenceStarted = true;
}
nodeRoot.removeChild(node);
spanToUse.appendChild(node);
// Setup for nextSibling
node = spanToUse;
}
}
// Node is NOT a text node
else{
if(!sentenceStarted){
spanToUse = document.createElement("span");
spanToUse.className = "sentence"
rawObjectSentenceDoms.push(spanToUse);
var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
// spanToUse.setAttribute("style", "background-color:" + color + ";");
nodeRoot.insertBefore(spanToUse, node);
sentenceStarted = true;
}
nodeRoot.removeChild(node);
spanToUse.appendChild(node);
// Setup for nextSibling
node = spanToUse;
}
node = node.nextSibling;
}
// TODO: Force ending of sentence if no sentence sign was found.
if(sentenceStarted){
sentenceStarted = false;
spanToUse = undefined;
}
return rawObjectSentenceDoms;
}
/**
* Postprocessing for retrieveRawSentenceDoms. Merges split sentences back into one.
* @param {Array} rawObjectSentenceDoms - The output of retrieveRawSentenceDoms().
* @param {Node} rootNode - the root node in which the elements of rawObjectSentenceDoms lie
* @returns {Array} - merged sentences. Same format as rawObjectSentenceDoms.
* @memberof content.js
*/
function mergeRawSentenceDoms(rawObjectSentenceDoms, rootNode){
var merged = [];
for(var i = 0; i < rawObjectSentenceDoms.length; i++){
var elem = rawObjectSentenceDoms[i];
var elemText = $(elem).text();
// CHECK IF FIRST ELEMENT IS SUP. THIS BELONGS TO PREVIOUS SENTENCE AND WILL MESS
// UP THE SENTENCE FORMATTING
while(elem.childNodes.length >= 1 && elem.childNodes[0].nodeName === "SUP" && i != 0){
var sup = elem.childNodes[0];
elem.removeChild(sup);
merged[merged.length - 1].appendChild(sup);
elemText = $(elem).text();
}
// CHECK IF WHOLE SENTENCE BELONGS TO PREVIOUS ONE.
var belongsToPrevious = false;
// This sentence is too short and does not start like a sentence should despite
// there is a sentence in front of it. This means that it belongs to the previous sentence.
if(!(/^ [A-Z0-9].*/.test(elemText)) && i != 0){
belongsToPrevious = true;
}
if(belongsToPrevious){
while(elem.childNodes.length != 0){
var child = elem.childNodes[0];
elem.removeChild(child);
merged[merged.length - 1].appendChild(child);
}
rootNode.removeChild(elem);
}
else{
merged.push(elem);
}
}
return merged;
}
/**
* Calculates all metrics and injects the tooltip-source code.
* @returns {Array} - The fully annotated sentences as DOM in wrapper span-elements in an Array.
* @memberof content.js
*/
function annotate(){
var sentenceDoms = [];
var objs = $('#mw-content-text p, #mw-content-text li, #mw-content-text dd')
.not("#toc p, #toc li, #toc dd") // Exclude index
.not("div.reflist p, div.reflist li, div.reflist dd"); // Exclude references
objs.each(function() {
var rawObjectSentenceDoms = retrieveRawSentenceDoms(this);
var mergedObjectSentenceDoms = mergeRawSentenceDoms(rawObjectSentenceDoms, this);
for(var i = 0; i < mergedObjectSentenceDoms.length; i++){
var sentenceDom = mergedObjectSentenceDoms[i];;
sentenceDoms.push(sentenceDom);
}
});
for(var i = 0; i < sentenceDoms.length; i++){
var sentenceDom = sentenceDoms[i];
// PLAINTEXT
var plainText = $(sentenceDom).text();
plainText = plainText.replace(/\[\d+\]/g, '').trim();
$(sentenceDom).data('plaintext', plainText);
// SENTENCE-LENGTH IN WORDS
var sentenceLength = plainText.split(/\s+/).length;
var sentenceLengthScore = map(averageSentenceLengthEasy, sentenceLength, averageSentenceLengthHard);
$(sentenceDom).data('sentence-length', sentenceLength);
$(sentenceDom).data('sentence-length-score', sentenceLengthScore);
// AVERAGE WORD LENGTH
var wordLengthString = plainText.replace(/[^a-zA-Z0-9]/g, '').trim();
var averageWordLength = wordLengthString.length / sentenceLength;
var averageWordLengthScore = map(averageAverageWordLengthEasy, averageWordLength, averageAverageWordLengthHard);
$(sentenceDom).data('average-word-length', averageWordLength);
$(sentenceDom).data('average-word-length-score', averageWordLengthScore);
// NOUN TO VERB RATIO (FOR NOMINAL FORMS)
var ntvRatio = nlp.pos(plainText).nouns().length / Math.max(0.00000000000001, nlp.pos(plainText).verbs().length);
var ntvRatioScore = map(averageNtvRatioEasy, ntvRatio, averageNtvRatioHard);
$(sentenceDom).data('ntv-ratio', ntvRatio);
$(sentenceDom).data('ntv-ratio-score', ntvRatioScore);
// SENTENCE COMPLEXITY
var filteredPlainText = plainText
.replace(/[^A-Za-z0-9,\s()]/g, '')
.replace(/^\s*[A-Za-z0-9]+\s*/, '')
.replace(/\s*[A-Za-z0-9]+\s*$/, '');
var sentenceSplitters = filteredPlainText.match(/(who|which|that|whose|,|neither|\s-\s|\()/g);
var sentenceComplexity = ((sentenceSplitters == null)? 0 : sentenceSplitters.length) / sentenceLength;
var sentenceComplexityScore = map(averageSentenceComplexityEasy, sentenceComplexity, averageSentenceComplexityHard);
$(sentenceDom).data('sentence-complexity', sentenceComplexity);
$(sentenceDom).data('sentence-complexity-score', sentenceComplexityScore);
// WORD COMPLEXITY
var wordComplexityString = plainText.replace(/[^a-zA-Z0-9\s]/g, '').trim();
var wordComplexityWords = wordComplexityString.split(/\s+/);
var inDictCount = 0;
for(var wi = 0; wi < wordComplexityWords.length; wi++){
if(wordList[wordComplexityWords[wi].toLowerCase()] === true){
inDictCount++;
}
}
var wordComplexity = inDictCount / sentenceLength;
var wordComplexityScore = map(averageWordComplexityEasy, wordComplexity, averageWordComplexityHard);
$(sentenceDom).data('word-complexity', wordComplexity);
$(sentenceDom).data('word-complexity-score', wordComplexityScore);
// APPEND MOUSEOVER
$(sentenceDom).append('<span class="tooltip"><table>'+
'<col width="60px" />' +
'<col width="60px" />' +
'<col width="60px" />' +
'<col width="60px" />' +
'<col width="60px" />' +
'<tr> ' +
'<th><img src="' + chrome.extension.getURL('line-length.png') + '"/></th>' +
'<th><img src="' + chrome.extension.getURL('word-length.png') + '"/></th>' +
'<th><img src="' + chrome.extension.getURL('nominal-forms.png') + '"/></th>' +
'<th><img src="' + chrome.extension.getURL('structure-complexity.png') + '"/></th>' +
'<th><img src="' + chrome.extension.getURL('word-complexity.png') + '"/></th>' +
'</tr>' +
'<tr>' +
'<td height="40px" style="background-color:' +
getColor(sentenceLengthScore) + ';">' +
sentenceLength +
'</td>' +
'<td height="40px" style="background-color:' +
getColor(averageWordLengthScore) + ';">' +
averageWordLength.toFixed(2) +
'</td>' +
'<td height="40px" style="background-color:' +
getColor(ntvRatioScore) + ';">' +
ntvRatio.toFixed(2) +
'</td>' +
'<td height="40px" style="background-color:' +
getColor(sentenceComplexityScore) + ';">' +
sentenceComplexity.toFixed(2) +
'</td>' +
'<td height="40px" style="background-color:' +
getColor(wordComplexityScore) + ';">' +
wordComplexity.toFixed(2) +
'</td>' +
'</tr>' +
'</table></span>');
// CALCULATE COMBINED SCORE AND SET IT
var score = calcCombinedScore(sentenceLengthScore, averageWordLengthScore, ntvRatioScore, sentenceComplexityScore, wordComplexityScore);
$(sentenceDom).data('score', score);
}
return sentenceDoms;
}
/**
* Calculates the combined score of the given scores. Influence-variables must already exist.
* @memberof content.js
*/
function calcCombinedScore(sentenceLengthScore, averageWordLengthScore, ntvRatioScore, sentenceComplexityScore, wordComplexityScore){
var exactSum = averageWordLengthInfluence + sentenceLengthInfluence + ntvRatioInfluence + sentenceComplexityInfluence + wordComplexityInfluence;
var score = averageWordLengthScore * averageWordLengthInfluence / exactSum +
sentenceLengthScore * sentenceLengthInfluence / exactSum +
sentenceComplexityScore * sentenceComplexityInfluence / exactSum +
wordComplexityScore * wordComplexityInfluence / exactSum +
ntvRatioScore * ntvRatioInfluence / exactSum;
return score;
}
/**
* Maps the value between 0.0 (worst) and 1.0 (best). Clamped, if the value is out of this range.
* @memberof content.js
*/
function map(best, val, worst){
var inverted = worst < best;
if(inverted){
var tmp = best;
best = worst;
worst = tmp;
}
if(val < best){
return inverted ? 0.0 : 1.0;
}
else if(val > worst){
return inverted? 1.0 : 0.0;
}
else{
var mappedValue = (val - best) / (worst - best);
return inverted ? mappedValue : (1 - mappedValue);
}
}
/**
* Calculates a color string for a given value.
* @param {Float} value - Value between 0.0 and 1.0
* @returns {String} - The color string specifying a color between red (for 0.0) and blue (for 1.0).
* @memberof content.js
*/
function getColor(value){
// For value 0.
var r0 = 255;
var g0 = 255/2;
var b0 = 255/2;
// Intremediate for value 0.5
var r5 = 255;
var g5 = 255;
var b5 = 255;
// For value 1.0
var r1 = 107;
var g1 = 174;
var b1 = 212;
var r, g, b;
// Interpolate between 0.0 and 0.5
if(value <= 0.5){
var scale = value * 2;
r = (1-scale) * r0 + scale * r5;
g = (1-scale) * g0 + scale * g5;
b = (1-scale) * b0 + scale * b5;
}
// Interpolate between 0.5 and 1.0
else{
var scale = (value - 0.5) * 2;
r = (1-scale) * r5 + scale * r1;
g = (1-scale) * g5 + scale * g1;
b = (1-scale) * b5 + scale * b1;
}
var color = '#' +
Math.round(r).toString(16) +
Math.round(g).toString(16) +
Math.round(b).toString(16);
return color;
}
/**
* Adds a callback to determine the position the mouse to all classes "tooltip".
* @memberof content.js
*/
function addToolTipMouseCallback(){
var tooltips = document.querySelectorAll('.tooltip');
window.onmousemove = function (e) {
var x = (e.clientX + 20) + 'px',
y = (e.clientY + 20) + 'px';
for (var i = 0; i < tooltips.length; i++) {
tooltips[i].style.top = y;
tooltips[i].style.left = x;
}
};
}
/**
* Calculates and sets the color of each sentence span, or removes it.
* @param {Array} sentenceDoms - The array of spam-elements in which the DOM for each sentence lies
* @param {Boolean} show - Should the spans get the score as background color? If not, background-color will be set to "transparent".
* @memberof content.js
*/
function showColors(sentenceDoms, show){
for(var i = 0; i < sentenceDoms.length; i++){
sentenceDom = sentenceDoms[i];
if(show){
sentenceDom.style.backgroundColor = getColor($(sentenceDom).data("score"));
}
else{
sentenceDom.style.backgroundColor = "transparent";
}
}
}
/**
* Updates the combined score values for each span
* @param {Array} sentenceDoms - The array of spam-elements in which the DOM for each sentence lies
* @memberof content.js
*/
function updateScores(sentenceDoms){
for(var i = 0; i < sentenceDoms.length; i++){
sentenceDom = sentenceDoms[i];
$(sentenceDom).data("score", calcCombinedScore(
$(sentenceDom).data('sentence-length-score'),
$(sentenceDom).data('average-word-length-score'),
$(sentenceDom).data('ntv-ratio-score'),
$(sentenceDom).data('sentence-complexity-score'),
$(sentenceDom).data('word-complexity-score')
));
}
}
/**
* Enables or disables the tooltips for the sentences selected by .sentence span.tooltip.
* @memberof content.js
*/
function showTooltip(show){
$(".sentence span.tooltip").each(function() {
if(show){
this.style.display = "block"
}
else{
this.style.display = "none";
}
});
}
/**
* Loads the word list containing the 1000 most common words.
* This function must be called before trying to use {annotate()}
* @memberof content.js
*/
function loadWordList(){
// We have to perform a synchronous lookup
jQuery.ajaxSetup({async:false});
var wordListArray = [];
$.get(chrome.extension.getURL('most-common-words_1000.txt'), function(data){
wordListArray = data.split(/[\r\n]+/);
});
// Reset jQuery ajax to use standard async mode.
jQuery.ajaxSetup({async:true});
var wordList = {};
for(var i = 0; i < wordListArray.length; i++){
wordList[wordListArray[i]] = true;
}
return wordList;
}
wordList = loadWordList();
var sentenceDoms = annotate();
addToolTipMouseCallback();
/**
* Listener function for Chrome messaging system. The meaning of the parameters is documented by Google Chromes' extension documentation.
* @memberof content.js
**/
function listener(request, sender, callback){
if(request.action == 'update-score'){
averageWordLengthInfluence = parseFloat(request.wordLengthInfluence);
sentenceLengthInfluence = parseFloat(request.sentenceLengthInfluence);
ntvRatioInfluence = parseFloat(request.ntvRatioInfluence);
sentenceComplexityInfluence = parseFloat(request.sentenceComplexityInfluence);
wordComplexityInfluence = parseFloat(request.wordComplexityInfluence);
updateScores(sentenceDoms);
}
else if(request.action == 'show-tooltip'){
}
else if(request.action == 'enable'){
showColors(sentenceDoms, true);
enabled = true;
if(tooltipEnabled){
showTooltip(true);
}
}
else if(request.action == 'disable'){
showColors(sentenceDoms, false);
enabled = false;
if(!tooltipEnabled){
showTooltip(false);
}
}
else if(request.action == 'enable-tooltip'){
showTooltip(true);
tooltipEnabled = true;
}
else if(request.action == 'disable-tooltip'){
showTooltip(false);
tooltipEnabled = false;
}
else if(request.action = 'get-values'){
callback([enabled, tooltipEnabled, sentenceLengthInfluence, averageWordLengthInfluence, ntvRatioInfluence, sentenceComplexityInfluence, wordComplexityInfluence]);
}
}
// Add listener to extension
chrome.runtime.onMessage.addListener(listener);