Source: content.js

  1. /**
  2. * @file A content file which will be injected to the page. Communicaties with
  3. * the extension via message passing. This file also works as the plugin storage for UI element settings.
  4. * @author Ulrik Schremser
  5. */
  6. var wordList = undefined;
  7. var averageSentenceLengthEasy = 12.0;
  8. var averageSentenceLengthHard = 42.0;
  9. /*
  10. @book{biber1998corpus,
  11. title={Corpus linguistics: Investigating language structure and use},
  12. author={Biber, Douglas and Conrad, Susan and Reppen, Randi},
  13. year={1998},
  14. publisher={Cambridge University Press}
  15. pages={68}
  16. }
  17. */
  18. var averageAverageWordLengthEasy = 3.8;
  19. var averageAverageWordLengthHard = 8.2;
  20. var averageSentenceComplexityEasy = 0.05;
  21. var averageSentenceComplexityHard = 0.15;
  22. var averageWordComplexityEasy = 0.6;
  23. var averageWordComplexityHard = 0.2;
  24. var averageNtvRatioEasy = 1.2;
  25. var averageNtvRatioHard = 2.2;
  26. var sentenceLengthInfluence = 3500;
  27. var averageWordLengthInfluence = 1200;
  28. var ntvRatioInfluence = 800;
  29. var sentenceComplexityInfluence = 2000;
  30. var wordComplexityInfluence = 2500;
  31. var enabled = false;
  32. var tooltipEnabled = false;
  33. String.prototype.endsWith = function(suffix) {
  34. return this.indexOf(suffix, this.length - suffix.length) !== -1;
  35. };
  36. if (typeof String.prototype.startsWith != 'function') {
  37. // see below for better implementation!
  38. String.prototype.startsWith = function (str){
  39. return this.indexOf(str) === 0;
  40. };
  41. }
  42. String.prototype.regexIndexOf = function(regex, startpos) {
  43. var indexOf = this.substring(startpos || 0).search(regex);
  44. return (indexOf >= 0) ? (indexOf + (startpos || 0)) : indexOf;
  45. }
  46. /**
  47. * Determins the sentences contained in a DOM Node by simple means,
  48. * wraps them into spans and returns the list of these spans. TextNodes get split if necessary.
  49. * @param {Node} nodeRoot - the root node in which to look for sentences. It has to contain only complete sentences.
  50. * @returns {Array} - Returns an array of spans which wrap the Nodes of sentences.
  51. * @memberof content.js
  52. */
  53. function retrieveRawSentenceDoms(nodeRoot) {
  54. var sentenceStarted = false;
  55. var spanToUse = undefined;
  56. var rawObjectSentenceDoms = [];
  57. var node = nodeRoot.firstChild;
  58. while (node) {
  59. // We have got a text node here, of which we have to take special care:
  60. // They might consist of multiple sentences!
  61. if(node.nodeType == 3 && node.nodeValue.length != 0){
  62. // Add all complete sentences
  63. for(var index = node.nodeValue.regexIndexOf(/[.?!]/); index >= 0; index = node.nodeValue.regexIndexOf(/[.?!]/)){
  64. // We have a sentence that ends here.
  65. var newNode = node.splitText(index + 1);
  66. // If there hasn't been a started sentence, this is a complete senetence
  67. // and thus a new wrapper for this complete sentence has to be created.
  68. if(!sentenceStarted){
  69. spanToUse = document.createElement("span");
  70. spanToUse.className = "sentence"
  71. rawObjectSentenceDoms.push(spanToUse);
  72. var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
  73. nodeRoot.insertBefore(spanToUse, node);
  74. sentenceStarted = true;
  75. }
  76. nodeRoot.removeChild(node);
  77. spanToUse.appendChild(node);
  78. sentenceStarted = false;
  79. node = newNode;
  80. }
  81. // Add uncompleted sentences
  82. // If there hasn't been a started sentence, this is a complete senetence
  83. // and thus a new wrapper for this complete sentence has to be created.
  84. if(node.nodeValue.length != 0){
  85. if(!sentenceStarted){
  86. spanToUse = document.createElement("span");
  87. spanToUse.className = "sentence"
  88. rawObjectSentenceDoms.push(spanToUse);
  89. var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
  90. // spanToUse.setAttribute("style", "background-color:" + color + ";");
  91. nodeRoot.insertBefore(spanToUse, node);
  92. sentenceStarted = true;
  93. }
  94. nodeRoot.removeChild(node);
  95. spanToUse.appendChild(node);
  96. // Setup for nextSibling
  97. node = spanToUse;
  98. }
  99. }
  100. // Node is NOT a text node
  101. else{
  102. if(!sentenceStarted){
  103. spanToUse = document.createElement("span");
  104. spanToUse.className = "sentence"
  105. rawObjectSentenceDoms.push(spanToUse);
  106. var color = '#'+(0x1000000+(Math.random())*0xffffff).toString(16).substr(1,6);
  107. // spanToUse.setAttribute("style", "background-color:" + color + ";");
  108. nodeRoot.insertBefore(spanToUse, node);
  109. sentenceStarted = true;
  110. }
  111. nodeRoot.removeChild(node);
  112. spanToUse.appendChild(node);
  113. // Setup for nextSibling
  114. node = spanToUse;
  115. }
  116. node = node.nextSibling;
  117. }
  118. // TODO: Force ending of sentence if no sentence sign was found.
  119. if(sentenceStarted){
  120. sentenceStarted = false;
  121. spanToUse = undefined;
  122. }
  123. return rawObjectSentenceDoms;
  124. }
  125. /**
  126. * Postprocessing for retrieveRawSentenceDoms. Merges split sentences back into one.
  127. * @param {Array} rawObjectSentenceDoms - The output of retrieveRawSentenceDoms().
  128. * @param {Node} rootNode - the root node in which the elements of rawObjectSentenceDoms lie
  129. * @returns {Array} - merged sentences. Same format as rawObjectSentenceDoms.
  130. * @memberof content.js
  131. */
  132. function mergeRawSentenceDoms(rawObjectSentenceDoms, rootNode){
  133. var merged = [];
  134. for(var i = 0; i < rawObjectSentenceDoms.length; i++){
  135. var elem = rawObjectSentenceDoms[i];
  136. var elemText = $(elem).text();
  137. // CHECK IF FIRST ELEMENT IS SUP. THIS BELONGS TO PREVIOUS SENTENCE AND WILL MESS
  138. // UP THE SENTENCE FORMATTING
  139. while(elem.childNodes.length >= 1 && elem.childNodes[0].nodeName === "SUP" && i != 0){
  140. var sup = elem.childNodes[0];
  141. elem.removeChild(sup);
  142. merged[merged.length - 1].appendChild(sup);
  143. elemText = $(elem).text();
  144. }
  145. // CHECK IF WHOLE SENTENCE BELONGS TO PREVIOUS ONE.
  146. var belongsToPrevious = false;
  147. // This sentence is too short and does not start like a sentence should despite
  148. // there is a sentence in front of it. This means that it belongs to the previous sentence.
  149. if(!(/^ [A-Z0-9].*/.test(elemText)) && i != 0){
  150. belongsToPrevious = true;
  151. }
  152. if(belongsToPrevious){
  153. while(elem.childNodes.length != 0){
  154. var child = elem.childNodes[0];
  155. elem.removeChild(child);
  156. merged[merged.length - 1].appendChild(child);
  157. }
  158. rootNode.removeChild(elem);
  159. }
  160. else{
  161. merged.push(elem);
  162. }
  163. }
  164. return merged;
  165. }
  166. /**
  167. * Calculates all metrics and injects the tooltip-source code.
  168. * @returns {Array} - The fully annotated sentences as DOM in wrapper span-elements in an Array.
  169. * @memberof content.js
  170. */
  171. function annotate(){
  172. var sentenceDoms = [];
  173. var objs = $('#mw-content-text p, #mw-content-text li, #mw-content-text dd')
  174. .not("#toc p, #toc li, #toc dd") // Exclude index
  175. .not("div.reflist p, div.reflist li, div.reflist dd"); // Exclude references
  176. objs.each(function() {
  177. var rawObjectSentenceDoms = retrieveRawSentenceDoms(this);
  178. var mergedObjectSentenceDoms = mergeRawSentenceDoms(rawObjectSentenceDoms, this);
  179. for(var i = 0; i < mergedObjectSentenceDoms.length; i++){
  180. var sentenceDom = mergedObjectSentenceDoms[i];;
  181. sentenceDoms.push(sentenceDom);
  182. }
  183. });
  184. for(var i = 0; i < sentenceDoms.length; i++){
  185. var sentenceDom = sentenceDoms[i];
  186. // PLAINTEXT
  187. var plainText = $(sentenceDom).text();
  188. plainText = plainText.replace(/\[\d+\]/g, '').trim();
  189. $(sentenceDom).data('plaintext', plainText);
  190. // SENTENCE-LENGTH IN WORDS
  191. var sentenceLength = plainText.split(/\s+/).length;
  192. var sentenceLengthScore = map(averageSentenceLengthEasy, sentenceLength, averageSentenceLengthHard);
  193. $(sentenceDom).data('sentence-length', sentenceLength);
  194. $(sentenceDom).data('sentence-length-score', sentenceLengthScore);
  195. // AVERAGE WORD LENGTH
  196. var wordLengthString = plainText.replace(/[^a-zA-Z0-9]/g, '').trim();
  197. var averageWordLength = wordLengthString.length / sentenceLength;
  198. var averageWordLengthScore = map(averageAverageWordLengthEasy, averageWordLength, averageAverageWordLengthHard);
  199. $(sentenceDom).data('average-word-length', averageWordLength);
  200. $(sentenceDom).data('average-word-length-score', averageWordLengthScore);
  201. // NOUN TO VERB RATIO (FOR NOMINAL FORMS)
  202. var ntvRatio = nlp.pos(plainText).nouns().length / Math.max(0.00000000000001, nlp.pos(plainText).verbs().length);
  203. var ntvRatioScore = map(averageNtvRatioEasy, ntvRatio, averageNtvRatioHard);
  204. $(sentenceDom).data('ntv-ratio', ntvRatio);
  205. $(sentenceDom).data('ntv-ratio-score', ntvRatioScore);
  206. // SENTENCE COMPLEXITY
  207. var filteredPlainText = plainText
  208. .replace(/[^A-Za-z0-9,\s()]/g, '')
  209. .replace(/^\s*[A-Za-z0-9]+\s*/, '')
  210. .replace(/\s*[A-Za-z0-9]+\s*$/, '');
  211. var sentenceSplitters = filteredPlainText.match(/(who|which|that|whose|,|neither|\s-\s|\()/g);
  212. var sentenceComplexity = ((sentenceSplitters == null)? 0 : sentenceSplitters.length) / sentenceLength;
  213. var sentenceComplexityScore = map(averageSentenceComplexityEasy, sentenceComplexity, averageSentenceComplexityHard);
  214. $(sentenceDom).data('sentence-complexity', sentenceComplexity);
  215. $(sentenceDom).data('sentence-complexity-score', sentenceComplexityScore);
  216. // WORD COMPLEXITY
  217. var wordComplexityString = plainText.replace(/[^a-zA-Z0-9\s]/g, '').trim();
  218. var wordComplexityWords = wordComplexityString.split(/\s+/);
  219. var inDictCount = 0;
  220. for(var wi = 0; wi < wordComplexityWords.length; wi++){
  221. if(wordList[wordComplexityWords[wi].toLowerCase()] === true){
  222. inDictCount++;
  223. }
  224. }
  225. var wordComplexity = inDictCount / sentenceLength;
  226. var wordComplexityScore = map(averageWordComplexityEasy, wordComplexity, averageWordComplexityHard);
  227. $(sentenceDom).data('word-complexity', wordComplexity);
  228. $(sentenceDom).data('word-complexity-score', wordComplexityScore);
  229. // APPEND MOUSEOVER
  230. $(sentenceDom).append('<span class="tooltip"><table>'+
  231. '<col width="60px" />' +
  232. '<col width="60px" />' +
  233. '<col width="60px" />' +
  234. '<col width="60px" />' +
  235. '<col width="60px" />' +
  236. '<tr> ' +
  237. '<th><img src="' + chrome.extension.getURL('line-length.png') + '"/></th>' +
  238. '<th><img src="' + chrome.extension.getURL('word-length.png') + '"/></th>' +
  239. '<th><img src="' + chrome.extension.getURL('nominal-forms.png') + '"/></th>' +
  240. '<th><img src="' + chrome.extension.getURL('structure-complexity.png') + '"/></th>' +
  241. '<th><img src="' + chrome.extension.getURL('word-complexity.png') + '"/></th>' +
  242. '</tr>' +
  243. '<tr>' +
  244. '<td height="40px" style="background-color:' +
  245. getColor(sentenceLengthScore) + ';">' +
  246. sentenceLength +
  247. '</td>' +
  248. '<td height="40px" style="background-color:' +
  249. getColor(averageWordLengthScore) + ';">' +
  250. averageWordLength.toFixed(2) +
  251. '</td>' +
  252. '<td height="40px" style="background-color:' +
  253. getColor(ntvRatioScore) + ';">' +
  254. ntvRatio.toFixed(2) +
  255. '</td>' +
  256. '<td height="40px" style="background-color:' +
  257. getColor(sentenceComplexityScore) + ';">' +
  258. sentenceComplexity.toFixed(2) +
  259. '</td>' +
  260. '<td height="40px" style="background-color:' +
  261. getColor(wordComplexityScore) + ';">' +
  262. wordComplexity.toFixed(2) +
  263. '</td>' +
  264. '</tr>' +
  265. '</table></span>');
  266. // CALCULATE COMBINED SCORE AND SET IT
  267. var score = calcCombinedScore(sentenceLengthScore, averageWordLengthScore, ntvRatioScore, sentenceComplexityScore, wordComplexityScore);
  268. $(sentenceDom).data('score', score);
  269. }
  270. return sentenceDoms;
  271. }
  272. /**
  273. * Calculates the combined score of the given scores. Influence-variables must already exist.
  274. * @memberof content.js
  275. */
  276. function calcCombinedScore(sentenceLengthScore, averageWordLengthScore, ntvRatioScore, sentenceComplexityScore, wordComplexityScore){
  277. var exactSum = averageWordLengthInfluence + sentenceLengthInfluence + ntvRatioInfluence + sentenceComplexityInfluence + wordComplexityInfluence;
  278. var score = averageWordLengthScore * averageWordLengthInfluence / exactSum +
  279. sentenceLengthScore * sentenceLengthInfluence / exactSum +
  280. sentenceComplexityScore * sentenceComplexityInfluence / exactSum +
  281. wordComplexityScore * wordComplexityInfluence / exactSum +
  282. ntvRatioScore * ntvRatioInfluence / exactSum;
  283. return score;
  284. }
  285. /**
  286. * Maps the value between 0.0 (worst) and 1.0 (best). Clamped, if the value is out of this range.
  287. * @memberof content.js
  288. */
  289. function map(best, val, worst){
  290. var inverted = worst < best;
  291. if(inverted){
  292. var tmp = best;
  293. best = worst;
  294. worst = tmp;
  295. }
  296. if(val < best){
  297. return inverted ? 0.0 : 1.0;
  298. }
  299. else if(val > worst){
  300. return inverted? 1.0 : 0.0;
  301. }
  302. else{
  303. var mappedValue = (val - best) / (worst - best);
  304. return inverted ? mappedValue : (1 - mappedValue);
  305. }
  306. }
  307. /**
  308. * Calculates a color string for a given value.
  309. * @param {Float} value - Value between 0.0 and 1.0
  310. * @returns {String} - The color string specifying a color between red (for 0.0) and blue (for 1.0).
  311. * @memberof content.js
  312. */
  313. function getColor(value){
  314. // For value 0.
  315. var r0 = 255;
  316. var g0 = 255/2;
  317. var b0 = 255/2;
  318. // Intremediate for value 0.5
  319. var r5 = 255;
  320. var g5 = 255;
  321. var b5 = 255;
  322. // For value 1.0
  323. var r1 = 107;
  324. var g1 = 174;
  325. var b1 = 212;
  326. var r, g, b;
  327. // Interpolate between 0.0 and 0.5
  328. if(value <= 0.5){
  329. var scale = value * 2;
  330. r = (1-scale) * r0 + scale * r5;
  331. g = (1-scale) * g0 + scale * g5;
  332. b = (1-scale) * b0 + scale * b5;
  333. }
  334. // Interpolate between 0.5 and 1.0
  335. else{
  336. var scale = (value - 0.5) * 2;
  337. r = (1-scale) * r5 + scale * r1;
  338. g = (1-scale) * g5 + scale * g1;
  339. b = (1-scale) * b5 + scale * b1;
  340. }
  341. var color = '#' +
  342. Math.round(r).toString(16) +
  343. Math.round(g).toString(16) +
  344. Math.round(b).toString(16);
  345. return color;
  346. }
  347. /**
  348. * Adds a callback to determine the position the mouse to all classes "tooltip".
  349. * @memberof content.js
  350. */
  351. function addToolTipMouseCallback(){
  352. var tooltips = document.querySelectorAll('.tooltip');
  353. window.onmousemove = function (e) {
  354. var x = (e.clientX + 20) + 'px',
  355. y = (e.clientY + 20) + 'px';
  356. for (var i = 0; i < tooltips.length; i++) {
  357. tooltips[i].style.top = y;
  358. tooltips[i].style.left = x;
  359. }
  360. };
  361. }
  362. /**
  363. * Calculates and sets the color of each sentence span, or removes it.
  364. * @param {Array} sentenceDoms - The array of spam-elements in which the DOM for each sentence lies
  365. * @param {Boolean} show - Should the spans get the score as background color? If not, background-color will be set to "transparent".
  366. * @memberof content.js
  367. */
  368. function showColors(sentenceDoms, show){
  369. for(var i = 0; i < sentenceDoms.length; i++){
  370. sentenceDom = sentenceDoms[i];
  371. if(show){
  372. sentenceDom.style.backgroundColor = getColor($(sentenceDom).data("score"));
  373. }
  374. else{
  375. sentenceDom.style.backgroundColor = "transparent";
  376. }
  377. }
  378. }
  379. /**
  380. * Updates the combined score values for each span
  381. * @param {Array} sentenceDoms - The array of spam-elements in which the DOM for each sentence lies
  382. * @memberof content.js
  383. */
  384. function updateScores(sentenceDoms){
  385. for(var i = 0; i < sentenceDoms.length; i++){
  386. sentenceDom = sentenceDoms[i];
  387. $(sentenceDom).data("score", calcCombinedScore(
  388. $(sentenceDom).data('sentence-length-score'),
  389. $(sentenceDom).data('average-word-length-score'),
  390. $(sentenceDom).data('ntv-ratio-score'),
  391. $(sentenceDom).data('sentence-complexity-score'),
  392. $(sentenceDom).data('word-complexity-score')
  393. ));
  394. }
  395. }
  396. /**
  397. * Enables or disables the tooltips for the sentences selected by .sentence span.tooltip.
  398. * @memberof content.js
  399. */
  400. function showTooltip(show){
  401. $(".sentence span.tooltip").each(function() {
  402. if(show){
  403. this.style.display = "block"
  404. }
  405. else{
  406. this.style.display = "none";
  407. }
  408. });
  409. }
  410. /**
  411. * Loads the word list containing the 1000 most common words.
  412. * This function must be called before trying to use {annotate()}
  413. * @memberof content.js
  414. */
  415. function loadWordList(){
  416. // We have to perform a synchronous lookup
  417. jQuery.ajaxSetup({async:false});
  418. var wordListArray = [];
  419. $.get(chrome.extension.getURL('most-common-words_1000.txt'), function(data){
  420. wordListArray = data.split(/[\r\n]+/);
  421. });
  422. // Reset jQuery ajax to use standard async mode.
  423. jQuery.ajaxSetup({async:true});
  424. var wordList = {};
  425. for(var i = 0; i < wordListArray.length; i++){
  426. wordList[wordListArray[i]] = true;
  427. }
  428. return wordList;
  429. }
  430. wordList = loadWordList();
  431. var sentenceDoms = annotate();
  432. addToolTipMouseCallback();
  433. /**
  434. * Listener function for Chrome messaging system. The meaning of the parameters is documented by Google Chromes' extension documentation.
  435. * @memberof content.js
  436. **/
  437. function listener(request, sender, callback){
  438. if(request.action == 'update-score'){
  439. averageWordLengthInfluence = parseFloat(request.wordLengthInfluence);
  440. sentenceLengthInfluence = parseFloat(request.sentenceLengthInfluence);
  441. ntvRatioInfluence = parseFloat(request.ntvRatioInfluence);
  442. sentenceComplexityInfluence = parseFloat(request.sentenceComplexityInfluence);
  443. wordComplexityInfluence = parseFloat(request.wordComplexityInfluence);
  444. updateScores(sentenceDoms);
  445. }
  446. else if(request.action == 'show-tooltip'){
  447. }
  448. else if(request.action == 'enable'){
  449. showColors(sentenceDoms, true);
  450. enabled = true;
  451. if(tooltipEnabled){
  452. showTooltip(true);
  453. }
  454. }
  455. else if(request.action == 'disable'){
  456. showColors(sentenceDoms, false);
  457. enabled = false;
  458. if(!tooltipEnabled){
  459. showTooltip(false);
  460. }
  461. }
  462. else if(request.action == 'enable-tooltip'){
  463. showTooltip(true);
  464. tooltipEnabled = true;
  465. }
  466. else if(request.action == 'disable-tooltip'){
  467. showTooltip(false);
  468. tooltipEnabled = false;
  469. }
  470. else if(request.action = 'get-values'){
  471. callback([enabled, tooltipEnabled, sentenceLengthInfluence, averageWordLengthInfluence, ntvRatioInfluence, sentenceComplexityInfluence, wordComplexityInfluence]);
  472. }
  473. }
  474. // Add listener to extension
  475. chrome.runtime.onMessage.addListener(listener);