Skip to content
This repository has been archived by the owner on Jun 13, 2024. It is now read-only.

Improve the algorithm to match case when replacing. #51

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 69 additions & 10 deletions substitutions/js/substitutions.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,79 @@
// Icon and idea are from www.xkcd.com/1288
chrome.runtime.sendMessage("config", function(response) {
"use strict";
// taken from http://stackoverflow.com/questions/17264639/replace-text-but-keep-case
function matchCase(text, pattern) {
var result = '';
for (var i = 0; i < text.length; i++) {
var c = text.charAt(i);
var p = pattern.charCodeAt(i);
if (p >= 65 && p < 65 + 26) {
result += c.toUpperCase();
function matchCase(replacement, original) {
// Compute the Hamming distance between the original text and its
// lowercase, titlecase, and uppercase equivalents.
var lowerOriginal = original.toLocaleLowerCase();
var upperOriginal = original.toLocaleUpperCase();
var hammingLower = 0;
var hammingTitle = 0;
var hammingUpper = 0;
var isTitlePosition = true;
for (var i = 0; i < original.length; i++) {
var origChar = original[i];
var lowerChar = lowerOriginal[i];
var upperChar = upperOriginal[i];
var titleChar = isTitlePosition ? upperChar : lowerChar;
if (origChar != lowerChar) {
hammingLower++;
}
if (origChar != upperChar) {
hammingUpper++;
}
if (origChar != titleChar) {
hammingTitle++;
}
// This is a heuristic to determine whether or not we're looking
// at a punctuation character. Essentially, this is just
// checking to see if it's a non-letter. It doesn't work
// entirely correctly, such as in non-unicameral scripts, but
// it's good enough for the purpose at hand.
isTitlePosition = (upperChar === lowerChar);
}
// Determine which case best approximates the original.
if (hammingLower <= hammingTitle && hammingLower <= hammingUpper) {
// The original is mostly in lowercase.
// We expect that the replacement string is also generally in
// lowercase, with things like proper names already capitalized
// for us.
if (original[0] === upperOriginal[0]) {
// The original's first character is capitalized, but it's
// mostly in lower case. We're probably starting a sentence
// or something.
return (replacement[0].toLocaleUpperCase() +
replacement.substr(1));
} else {
return replacement;
}
}
if (hammingUpper <= hammingLower && hammingUpper <= hammingTitle) {
// The original is mostly in uppercase.
return replacement.toLocaleUpperCase();
}

// The original is mostly in titlecase. Build a result in title
// case. Note that this path also is taken if there's a one-word
// original that starts a sentence; we can't tell the difference
// between that and a title, so we just assume it's a title and
// move on. (Multi-word originals are distinguished when computing
// the Hamming distance by looking at subsequent words.)
var resultArray = new Array(replacement.length);
var replacementLower = replacement.toLocaleLowerCase();
var replacementUpper = replacement.toLocaleUpperCase();
isTitlePosition = true;
for (var i = 0; i < replacement.length; i++) {
var charAtPos = replacement[i];
if (isTitlePosition) {
resultArray.push(replacementUpper[i]);
} else {
result += c.toLowerCase();
resultArray.push(charAtPos);
}
isTitlePosition = (replacementLower[i] === replacementUpper[i]);
}
return result;
return resultArray.join('');
}

var substitute = (function() {
"use strict";
var replacements, ignore, i, replacementsObject, original;
Expand Down