Source: content.js

/**
 * @file A content file which will be injected to the page. Communicaties with
 * the extension via message passing. This file also works as the plugin storage for UI element settings.
 * @author Ulrik Schremser
 */

var wordList = undefined;

var averageSentenceLengthEasy = 12.0;
var averageSentenceLengthHard = 42.0;

/*
@book{biber1998corpus,
  title={Corpus linguistics: Investigating language structure and use},
  author={Biber, Douglas and Conrad, Susan and Reppen, Randi},
  year={1998},
  publisher={Cambridge University Press}
  pages={68}
}
*/
var averageAverageWordLengthEasy = 3.8;
var averageAverageWordLengthHard = 8.2;

var averageSentenceComplexityEasy = 0.05;
var averageSentenceComplexityHard = 0.15;

var averageWordComplexityEasy = 0.6;
var averageWordComplexityHard = 0.2;

var averageNtvRatioEasy = 1.2;
var averageNtvRatioHard = 2.2;

var sentenceLengthInfluence = 3500;
var averageWordLengthInfluence = 1200;
var ntvRatioInfluence = 800;
var sentenceComplexityInfluence = 2000;
var wordComplexityInfluence = 2500;
var enabled = false;
var tooltipEnabled = false;

String.prototype.endsWith = function(suffix) {
    return this.indexOf(suffix, this.length - suffix.length) !== -1;
};

if (typeof String.prototype.startsWith != 'function') {
  // see below for better implementation!
  String.prototype.startsWith = function (str){
    return this.indexOf(str) === 0;
  };
}

String.prototype.regexIndexOf = function(regex, startpos) {
    var indexOf = this.substring(startpos || 0).search(regex);
    return (indexOf >= 0) ? (indexOf + (startpos || 0)) : indexOf;
}


/**
 * Determins the sentences contained in a DOM Node by simple means,
 * wraps them into spans and returns the list of these spans. TextNodes get split if necessary.
 * @param {Node} nodeRoot - the root node in which to look for sentences. It has to contain only complete sentences.
 * @returns {Array} - Returns an array of spans which wrap the Nodes of sentences.
 * @memberof content.js
 */
function retrieveRawSentenceDoms(nodeRoot) {
    
    var sentenceStarted = false;
    var spanToUse = undefined;
    
    var rawObjectSentenceDoms = [];
    
    var node = nodeRoot.firstChild;
    while (node) {
        
        
        // We have got a text node here, of which we have to take special care:
        // They might consist of multiple sentences!
        if(node.nodeType == 3 && node.nodeValue.length != 0){
            
            // Add all complete sentences
            for(var index = node.nodeValue.regexIndexOf(/[.?!]/); index >= 0; index = node.nodeValue.regexIndexOf(/[.?!]/)){
                
                
                // We have a sentence that ends here.

                var newNode = node.splitText(index + 1);
                
                // If there hasn't been a started sentence, this is a complete senetence
                // and thus a new wrapper for this complete sentence has to be created.
                if(!sentenceStarted){
                    spanToUse = document.createElement("span");
                    spanToUse.className = "sentence"
                    rawObjectSentenceDoms.push(spanToUse);
                    var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);

                    nodeRoot.insertBefore(spanToUse, node);
                    sentenceStarted = true;
                }
                
                nodeRoot.removeChild(node);
                spanToUse.appendChild(node);
                sentenceStarted = false;
                node = newNode;
                
            }
            
            // Add uncompleted sentences
            // If there hasn't been a started sentence, this is a complete senetence
                // and thus a new wrapper for this complete sentence has to be created.
            
            if(node.nodeValue.length != 0){
                if(!sentenceStarted){
                    spanToUse = document.createElement("span");
                    spanToUse.className = "sentence"
                    rawObjectSentenceDoms.push(spanToUse);
                    var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
  //                  spanToUse.setAttribute("style", "background-color:" + color + ";");
                    nodeRoot.insertBefore(spanToUse, node);
                    sentenceStarted = true;
                }
                
                nodeRoot.removeChild(node);
                spanToUse.appendChild(node);
                
                // Setup for nextSibling
                node = spanToUse;        
            }
            
        }
        // Node is NOT a text node
        else{
         
            if(!sentenceStarted){
                spanToUse = document.createElement("span");
                spanToUse.className = "sentence"
                rawObjectSentenceDoms.push(spanToUse);
                var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
 //               spanToUse.setAttribute("style", "background-color:" + color + ";");
                nodeRoot.insertBefore(spanToUse, node);
                sentenceStarted = true;
            }
            
            nodeRoot.removeChild(node);
            spanToUse.appendChild(node);
            
            // Setup for nextSibling
            node = spanToUse;
        }
  
        node = node.nextSibling;    
    }
    // TODO: Force ending of sentence if no sentence sign was found.
    if(sentenceStarted){

        sentenceStarted = false;
        spanToUse = undefined;
    }
        
    return rawObjectSentenceDoms;
}

/**
 * Postprocessing for retrieveRawSentenceDoms. Merges split sentences back into one.
 * @param {Array} rawObjectSentenceDoms - The output of retrieveRawSentenceDoms().
 * @param {Node} rootNode - the root node in which the elements of rawObjectSentenceDoms lie
 * @returns {Array} - merged sentences. Same format as rawObjectSentenceDoms.
 * @memberof content.js
 */
function mergeRawSentenceDoms(rawObjectSentenceDoms, rootNode){

    var merged = [];

    for(var i = 0; i < rawObjectSentenceDoms.length; i++){    
        
        var elem = rawObjectSentenceDoms[i];
        var elemText = $(elem).text();
        
        // CHECK IF FIRST ELEMENT IS SUP. THIS BELONGS TO PREVIOUS SENTENCE AND WILL MESS
        // UP THE SENTENCE FORMATTING
        while(elem.childNodes.length >= 1 && elem.childNodes[0].nodeName === "SUP" && i != 0){
            var sup = elem.childNodes[0];
            elem.removeChild(sup);
            merged[merged.length - 1].appendChild(sup);
            elemText = $(elem).text();
        }
       
        // CHECK IF WHOLE SENTENCE BELONGS TO PREVIOUS ONE.
        var belongsToPrevious = false;
    
        // This sentence is too short and does not start like a sentence should despite
        // there is a sentence in front of it. This means that it belongs to the previous sentence.
        if(!(/^ [A-Z0-9].*/.test(elemText)) && i != 0){

            belongsToPrevious = true;
        }
        
        if(belongsToPrevious){
            
            while(elem.childNodes.length != 0){
                
                var child = elem.childNodes[0];

                elem.removeChild(child);
                merged[merged.length - 1].appendChild(child);
            }
            
            rootNode.removeChild(elem);
        }
        else{
            
            merged.push(elem);
        }
        
    }

    return merged;       
}


/**
 * Calculates all metrics and injects the tooltip-source code.
 * @returns {Array} - The fully annotated sentences as DOM in wrapper span-elements in an Array.
 * @memberof content.js
 */
function annotate(){


	var sentenceDoms = [];
	var objs = $('#mw-content-text p, #mw-content-text li, #mw-content-text dd')
	.not("#toc p, #toc li, #toc dd")											// Exclude index
	.not("div.reflist p, div.reflist li, div.reflist dd");						// Exclude references

	objs.each(function() {
	    var rawObjectSentenceDoms = retrieveRawSentenceDoms(this);

	    var mergedObjectSentenceDoms = mergeRawSentenceDoms(rawObjectSentenceDoms, this);

	    for(var i = 0; i < mergedObjectSentenceDoms.length; i++){

	        var sentenceDom = mergedObjectSentenceDoms[i];;
	        sentenceDoms.push(sentenceDom);
	    }
	});


    for(var i = 0; i < sentenceDoms.length; i++){

        var sentenceDom = sentenceDoms[i];
        
        // PLAINTEXT
        var plainText = $(sentenceDom).text();
        plainText = plainText.replace(/\[\d+\]/g, '').trim();
        $(sentenceDom).data('plaintext', plainText);
        
        // SENTENCE-LENGTH IN WORDS
        var sentenceLength = plainText.split(/\s+/).length;
        var sentenceLengthScore = map(averageSentenceLengthEasy, sentenceLength, averageSentenceLengthHard);
        $(sentenceDom).data('sentence-length', sentenceLength);
        $(sentenceDom).data('sentence-length-score', sentenceLengthScore);
        
        // AVERAGE WORD LENGTH
        var wordLengthString = plainText.replace(/[^a-zA-Z0-9]/g, '').trim();
        var averageWordLength = wordLengthString.length / sentenceLength;
        var averageWordLengthScore = map(averageAverageWordLengthEasy, averageWordLength, averageAverageWordLengthHard);
        $(sentenceDom).data('average-word-length', averageWordLength);
        $(sentenceDom).data('average-word-length-score', averageWordLengthScore);
        

        // NOUN TO VERB RATIO (FOR NOMINAL FORMS)
        var ntvRatio = nlp.pos(plainText).nouns().length / Math.max(0.00000000000001, nlp.pos(plainText).verbs().length);
        var ntvRatioScore = map(averageNtvRatioEasy, ntvRatio, averageNtvRatioHard);
        $(sentenceDom).data('ntv-ratio', ntvRatio);
        $(sentenceDom).data('ntv-ratio-score', ntvRatioScore);


        // SENTENCE COMPLEXITY
        var filteredPlainText = plainText
		    .replace(/[^A-Za-z0-9,\s()]/g, '')
		    .replace(/^\s*[A-Za-z0-9]+\s*/, '')
		    .replace(/\s*[A-Za-z0-9]+\s*$/, '');

		var sentenceSplitters = filteredPlainText.match(/(who|which|that|whose|,|neither|\s-\s|\()/g);
		var sentenceComplexity = ((sentenceSplitters == null)? 0 : sentenceSplitters.length) / sentenceLength;
        var sentenceComplexityScore = map(averageSentenceComplexityEasy, sentenceComplexity, averageSentenceComplexityHard);
        $(sentenceDom).data('sentence-complexity', sentenceComplexity);
        $(sentenceDom).data('sentence-complexity-score', sentenceComplexityScore);

        // WORD COMPLEXITY
        var wordComplexityString = plainText.replace(/[^a-zA-Z0-9\s]/g, '').trim();
        var wordComplexityWords = wordComplexityString.split(/\s+/);
        var inDictCount = 0;
        for(var wi = 0; wi < wordComplexityWords.length; wi++){
        	if(wordList[wordComplexityWords[wi].toLowerCase()] === true){
        		inDictCount++;
        	}
        }
        var wordComplexity = inDictCount / sentenceLength;
        var wordComplexityScore = map(averageWordComplexityEasy, wordComplexity, averageWordComplexityHard);
        $(sentenceDom).data('word-complexity', wordComplexity);
        $(sentenceDom).data('word-complexity-score', wordComplexityScore);



        // APPEND MOUSEOVER
        $(sentenceDom).append('<span class="tooltip"><table>'+ 
                                  '<col width="60px" />' + 
                                  '<col width="60px" />' + 
                                  '<col width="60px" />' + 
                                  '<col width="60px" />' + 
                                  '<col width="60px" />' + 
                              '<tr> ' + 
                                  '<th><img src="' + chrome.extension.getURL('line-length.png') + '"/></th>' +
                                  '<th><img src="' + chrome.extension.getURL('word-length.png') + '"/></th>' + 
                                  '<th><img src="' + chrome.extension.getURL('nominal-forms.png') + '"/></th>' +
                                  '<th><img src="' + chrome.extension.getURL('structure-complexity.png') + '"/></th>' +
                                  '<th><img src="' + chrome.extension.getURL('word-complexity.png') + '"/></th>' +
                              '</tr>' + 
                              '<tr>' + 
                                  '<td height="40px" style="background-color:' + 
                                  getColor(sentenceLengthScore) + ';">' + 
                                      sentenceLength + 
                                  '</td>' + 
                              
                                  '<td height="40px" style="background-color:' + 
                                  getColor(averageWordLengthScore) + ';">' + 
                                      averageWordLength.toFixed(2) + 
                                  '</td>' + 

                                  '<td height="40px" style="background-color:' + 
                                  getColor(ntvRatioScore) + ';">' + 
                                      ntvRatio.toFixed(2) + 
                                  '</td>' + 

                                  '<td height="40px" style="background-color:' + 
                                  getColor(sentenceComplexityScore) + ';">' + 
                                      sentenceComplexity.toFixed(2) + 
                                  '</td>' + 

                                  '<td height="40px" style="background-color:' + 
                                  getColor(wordComplexityScore) + ';">' + 
                                      wordComplexity.toFixed(2) + 
                                  '</td>' + 
                              
                              '</tr>' + 
                              '</table></span>');
        
        // CALCULATE COMBINED SCORE AND SET IT
        var score = calcCombinedScore(sentenceLengthScore, averageWordLengthScore, ntvRatioScore, sentenceComplexityScore, wordComplexityScore);
        $(sentenceDom).data('score', score);

    }

    return sentenceDoms;
}

/**
 * Calculates the combined score of the given scores. Influence-variables must already exist.
 * @memberof content.js
 */
function calcCombinedScore(sentenceLengthScore, averageWordLengthScore, ntvRatioScore, sentenceComplexityScore, wordComplexityScore){

      	  var exactSum = averageWordLengthInfluence + sentenceLengthInfluence + ntvRatioInfluence + sentenceComplexityInfluence + wordComplexityInfluence;

	        var score = averageWordLengthScore * averageWordLengthInfluence / exactSum +
            sentenceLengthScore * sentenceLengthInfluence / exactSum + 
            sentenceComplexityScore * sentenceComplexityInfluence / exactSum + 
            wordComplexityScore * wordComplexityInfluence / exactSum + 
            ntvRatioScore * ntvRatioInfluence / exactSum;


           return score;
}

/**
 * Maps the value between 0.0 (worst) and 1.0 (best). Clamped, if the value is out of this range.
 * @memberof content.js
 */
function map(best, val, worst){
 
    var inverted = worst < best;
    
    if(inverted){
        var tmp = best;
        best = worst;
        worst = tmp;
    }
    
    if(val < best){
        
        return inverted ? 0.0 : 1.0;
    }
    else if(val > worst){
        
        return inverted? 1.0 : 0.0;
    }
    else{
     
        var mappedValue = (val - best) / (worst - best);
        
        return inverted ? mappedValue : (1 - mappedValue);
    }
}

/**
 * Calculates a color string for a given value.
 * @param {Float} value - Value between 0.0 and 1.0
 * @returns {String} - The color string specifying a color between red (for 0.0) and blue (for 1.0).
 * @memberof content.js
 */
function getColor(value){
 
    // For value 0.
    var r0 = 255;
    var g0 = 255/2;
    var b0 = 255/2;
    
    // Intremediate for value 0.5
    var r5 = 255;
    var g5 = 255;
    var b5 = 255;
    
    // For value 1.0
    var r1 = 107;
    var g1 = 174;
    var b1 = 212;
    
    var r, g, b;
    
    // Interpolate between 0.0 and 0.5    
    if(value <= 0.5){
        
        var scale = value * 2;
        
        r = (1-scale) * r0 + scale * r5;
        g = (1-scale) * g0 + scale * g5;
        b = (1-scale) * b0 + scale * b5;
        
    }
    
    // Interpolate between 0.5 and 1.0
    else{
        
        var scale = (value - 0.5) * 2;
        
        r = (1-scale) * r5 + scale * r1;
        g = (1-scale) * g5 + scale * g1;
        b = (1-scale) * b5 + scale * b1;
    }
    
    var color = '#' +
        Math.round(r).toString(16) + 
        Math.round(g).toString(16) +
        Math.round(b).toString(16);
   
    return color;
}




/**
 * Adds a callback to determine the position the mouse to all classes "tooltip".
 * @memberof content.js
 */
function addToolTipMouseCallback(){

		var tooltips = document.querySelectorAll('.tooltip');

		window.onmousemove = function (e) {

		    var x = (e.clientX + 20) + 'px',
		        y = (e.clientY + 20) + 'px';
		    for (var i = 0; i < tooltips.length; i++) {
		        tooltips[i].style.top = y;
		        tooltips[i].style.left = x;
		    }
		};
}

/**
 * Calculates and sets the color of each sentence span, or removes it.
 * @param {Array} sentenceDoms - The array of spam-elements in which the DOM for each sentence lies
 * @param {Boolean} show - Should the spans get the score as background color? If not, background-color will be set to "transparent".
 * @memberof content.js
 */
function showColors(sentenceDoms, show){

	for(var i = 0; i < sentenceDoms.length; i++){

		sentenceDom = sentenceDoms[i];

		if(show){
			
			sentenceDom.style.backgroundColor = getColor($(sentenceDom).data("score"));
		}
		else{
			sentenceDom.style.backgroundColor = "transparent";
		}
	}
}

/**
 * Updates the combined score values for each span
 * @param {Array} sentenceDoms - The array of spam-elements in which the DOM for each sentence lies
 * @memberof content.js
 */
function updateScores(sentenceDoms){

	for(var i = 0; i < sentenceDoms.length; i++){

		sentenceDom = sentenceDoms[i];
		$(sentenceDom).data("score", calcCombinedScore(
	        $(sentenceDom).data('sentence-length-score'),
	        $(sentenceDom).data('average-word-length-score'), 
	        $(sentenceDom).data('ntv-ratio-score'),
	        $(sentenceDom).data('sentence-complexity-score'),
	        $(sentenceDom).data('word-complexity-score')
			));
	}
}


/**
 * Enables or disables the tooltips for the sentences selected by .sentence span.tooltip.
 * @memberof content.js
 */
function showTooltip(show){

	$(".sentence span.tooltip").each(function() {

		if(show){
			this.style.display = "block"
		}
		else{
			this.style.display = "none";
		}
	});
}

/**
 * Loads the word list containing the 1000 most common words.
 * This function must be called before trying to use {annotate()}
 * @memberof content.js
 */
function loadWordList(){

	// We have to perform a synchronous lookup
	jQuery.ajaxSetup({async:false});

	var wordListArray = [];
    $.get(chrome.extension.getURL('most-common-words_1000.txt'), function(data){
         wordListArray = data.split(/[\r\n]+/);
    });

    // Reset jQuery ajax to use standard async mode.
	jQuery.ajaxSetup({async:true});
    var wordList = {};

    for(var i = 0; i < wordListArray.length; i++){

    	wordList[wordListArray[i]] = true;
    }

    return wordList;
}




wordList = loadWordList();
var sentenceDoms = annotate();
addToolTipMouseCallback();


/** 
 * Listener function for Chrome messaging system. The meaning of the parameters is documented by Google Chromes' extension documentation.
 * @memberof content.js
 **/
function listener(request, sender, callback){


	if(request.action == 'update-score'){

		averageWordLengthInfluence = parseFloat(request.wordLengthInfluence);
		sentenceLengthInfluence = parseFloat(request.sentenceLengthInfluence);
		ntvRatioInfluence = parseFloat(request.ntvRatioInfluence);
		sentenceComplexityInfluence = parseFloat(request.sentenceComplexityInfluence);
		wordComplexityInfluence = parseFloat(request.wordComplexityInfluence);
		updateScores(sentenceDoms);
	}
	else if(request.action == 'show-tooltip'){

	}
	else if(request.action == 'enable'){
		
		showColors(sentenceDoms, true);
		enabled = true;

		if(tooltipEnabled){
			showTooltip(true);
		}
	}
	else if(request.action == 'disable'){
		
		showColors(sentenceDoms, false);
		enabled = false;
		if(!tooltipEnabled){
			showTooltip(false);
		}
	}
	else if(request.action == 'enable-tooltip'){
	
		showTooltip(true);
		tooltipEnabled = true;
	}
	else if(request.action == 'disable-tooltip'){
		
		showTooltip(false);
		tooltipEnabled = false;
	}
	else if(request.action = 'get-values'){

		callback([enabled, tooltipEnabled, sentenceLengthInfluence, averageWordLengthInfluence, ntvRatioInfluence, sentenceComplexityInfluence, wordComplexityInfluence]);
	}
}

// Add listener to extension
chrome.runtime.onMessage.addListener(listener);