human (Dec 14, 2024, 11:08 AM)
The duplicate word check didn't spot this:
developers
developers.
Because of the .
in the second one
paste.txt
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Writing Style Analyzer</title>
<style>
* {
box-sizing: border-box;
}
body {
font-family: Helvetica, Arial, sans-serif;
line-height: 1.6;
margin: 0;
padding: 20px;
background: #f5f5f5;
}
.container {
max-width: 800px;
margin: 0 auto;
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
h1 {
margin-top: 0;
color: #333;
}
textarea {
width: 100%;
height: 200px;
padding: 12px;
border: 1px solid #ddd;
border-radius: 4px;
margin-bottom: 20px;
font-size: 16px;
font-family: inherit;
}
.results {
margin-top: 20px;
}
.category {
margin-bottom: 20px;
padding: 15px;
background: #f8f9fa;
border-radius: 4px;
}
.category h2 {
margin-top: 0;
color: #444;
font-size: 1.2em;
}
.highlight {
background: #ffd700;
padding: 2px 4px;
border-radius: 2px;
}
.warning {
color: #856404;
background-color: #fff3cd;
border: 1px solid #ffeeba;
padding: 10px;
margin-bottom: 10px;
border-radius: 4px;
}
</style>
</head>
<body>
<div class="container">
<h1>Writing Style Analyzer</h1>
<p>Adapted from <a href="https://matt.might.net/articles/shell-scripts-for-passive-voice-weasel-words-duplicates/">these shell scripts</a> published by Matt Might.</p>
<p>Paste your text below to check for weasel words, passive voice, and duplicate words:</p>
<textarea id="input" placeholder="Enter your text here..."></textarea>
<div id="results" class="results"></div>
</div>
<script type="module">
// Weasel words from the bash script
const weaselWords = [
'many', 'various', 'very', 'fairly', 'several', 'extremely',
'exceedingly', 'quite', 'remarkably', 'few', 'surprisingly',
'mostly', 'largely', 'huge', 'tiny', 'excellent', 'interestingly',
'significantly', 'substantially', 'clearly', 'vast', 'relatively',
'completely'
];
// Common irregular verbs for passive voice detection
const irregularVerbs = [
'awoken', 'been', 'born', 'beat', 'become', 'begun', 'bent',
'bound', 'bitten', 'bled', 'blown', 'broken', 'brought',
'built', 'burnt', 'bought', 'caught', 'chosen', 'come',
'dealt', 'done', 'drawn', 'driven', 'eaten', 'fallen',
'fought', 'found', 'flown', 'forgotten', 'given', 'gone',
'grown', 'hung', 'heard', 'hidden', 'held', 'kept', 'known',
'laid', 'led', 'left', 'lost', 'made', 'meant', 'met', 'paid',
'put', 'read', 'run', 'said', 'seen', 'sold', 'sent', 'set',
'shown', 'shut', 'sung', 'sat', 'slept', 'spoken', 'spent',
'stood', 'taken', 'taught', 'told', 'thought', 'thrown',
'understood', 'worn', 'won', 'written'
];
// Helper function to get word positions with their original text
function getWordPositions(text) {
const words = [];
const regex = /\S+/g;
let match;
while ((match = regex.exec(text)) !== null) {
words.push({
word: match[0],
index: match.index,
length: match[0].length
});
}
return words;
}
function getContext(text, wordPosition, prevWords = 3) {
const allWords = text.split(/\s+/);
// Find which word index we're at
let currentWordIndex = 0;
let currentPos = 0;
while (currentPos < wordPosition.index && currentWordIndex < allWords.length) {
currentPos += allWords[currentWordIndex].length;
// Account for the space after the word
if (currentPos < text.length) currentPos++;
currentWordIndex++;
}
// Get the previous N words and next few words
const start = Math.max(0, currentWordIndex - prevWords);
const end = Math.min(allWords.length, currentWordIndex + 4);
return allWords.slice(start, end).join(' ');
}
function findWeaselWords(text) {
const results = [];
const wordPositions = getWordPositions(text);
wordPositions.forEach(pos => {
if (weaselWords.includes(pos.word.toLowerCase())) {
results.push({
word: pos.word,
context: getContext(text, pos)
});
}
});
return results;
}
function findPassiveVoice(text) {
const results = [];
const beVerbs = ['am', 'is', 'are', 'was', 'were', 'be', 'been', 'being'];
const wordPositions = getWordPositions(text);
for (let i = 0; i < wordPositions.length - 1; i++) {
const currentWord = wordPositions[i].word.toLowerCase();
const nextWord = wordPositions[i + 1].word.toLowerCase();
if (beVerbs.includes(currentWord)) {
if (nextWord.endsWith('ed') || irregularVerbs.includes(nextWord)) {
results.push({
construction: `${currentWord} ${nextWord}`,
context: getContext(text, wordPositions[i])
});
}
}
}
return results;
}
function findDuplicateWords(text) {
const results = [];
const wordPositions = getWordPositions(text);
for (let i = 1; i < wordPositions.length; i++) {
if (wordPositions[i].word.toLowerCase() === wordPositions[i - 1].word.toLowerCase()) {
results.push({
word: wordPositions[i].word,
context: getContext(text, wordPositions[i])
});
}
}
return results;
}
function displayResults(weasels, passives, duplicates) {
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = '';
// Weasel Words
const weaselDiv = document.createElement('div');
weaselDiv.className = 'category';
weaselDiv.innerHTML = `
<h2>Weasel Words</h2>
${weasels.length === 0 ? 'No weasel words found.' :
weasels.map(w => `
<div class="warning">
Found "<span class="highlight">${w.word}</span>" in: "${w.context}"
</div>
`).join('')}
`;
resultsDiv.appendChild(weaselDiv);
// Passive Voice
const passiveDiv = document.createElement('div');
passiveDiv.className = 'category';
passiveDiv.innerHTML = `
<h2>Passive Voice</h2>
${passives.length === 0 ? 'No passive voice constructions found.' :
passives.map(p => `
<div class="warning">
Found passive voice "<span class="highlight">${p.construction}</span>" in: "${p.context}"
</div>
`).join('')}
`;
resultsDiv.appendChild(passiveDiv);
// Duplicate Words
const duplicateDiv = document.createElement('div');
duplicateDiv.className = 'category';
duplicateDiv.innerHTML = `
<h2>Duplicate Words</h2>
${duplicates.length === 0 ? 'No duplicate words found.' :
duplicates.map(d => `
<div class="warning">
Found duplicate word "<span class="highlight">${d.word}</span>" in: "${d.context}"
</div>
`).join('')}
`;
resultsDiv.appendChild(duplicateDiv);
}
// Set up event listener
document.getElementById('input').addEventListener('input', (e) => {
const text = e.target.value;
const weasels = findWeaselWords(text);
const passives = findPassiveVoice(text);
const duplicates = findDuplicateWords(text);
displayResults(weasels, passives, duplicates);
});
</script>
</body>
</html>
assistant (Dec 14, 2024, 11:08 AM)
I understand you'd like to improve the duplicate word detection to handle cases where one word has punctuation. Let me modify the code to strip punctuation before comparison while still preserving the original words for display.
function stripPunctuation(word) {
// Remove all punctuation from the word, preserving internal hyphens
return word.replace(/^[^\w\s-]+|[^\w\s-]+$/g, '');
}
function findDuplicateWords(text) {
const results = [];
const wordPositions = getWordPositions(text);
for (let i = 1; i < wordPositions.length; i++) {
const currentWordStripped = stripPunctuation(wordPositions[i].word.toLowerCase());
const prevWordStripped = stripPunctuation(wordPositions[i - 1].word.toLowerCase());
if (currentWordStripped === prevWordStripped && currentWordStripped.length > 0) {
results.push({
word: wordPositions[i].word,
context: getContext(text, wordPositions[i])
});
}
}
return results;
}
This updated version:
- Adds a
stripPunctuation()
function that removes punctuation from the start and end of words while preserving internal hyphens - Modifies
findDuplicateWords()
to compare the stripped versions of words - Includes a length check to avoid matching empty strings if all characters were stripped
Now it will catch duplicates like:
- "developers developers."
- "word, word"
- "test! test"
- "example... example"
While still preserving compound words with hyphens and showing the original words (with punctuation) in the results.
Would you like me to show you how to integrate this into your HTML file, or would you like to test any specific cases?
human (Dec 14, 2024, 11:08 AM)
output the full artifact
assistant (Dec 14, 2024, 11:09 AM)
I'll provide the complete updated HTML file with the improved duplicate word detection.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Writing Style Analyzer</title>
<style>
* {
box-sizing: border-box;
}
body {
font-family: Helvetica, Arial, sans-serif;
line-height: 1.6;
margin: 0;
padding: 20px;
background: #f5f5f5;
}
.container {
max-width: 800px;
margin: 0 auto;
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
h1 {
margin-top: 0;
color: #333;
}
textarea {
width: 100%;
height: 200px;
padding: 12px;
border: 1px solid #ddd;
border-radius: 4px;
margin-bottom: 20px;
font-size: 16px;
font-family: inherit;
}
.results {
margin-top: 20px;
}
.category {
margin-bottom: 20px;
padding: 15px;
background: #f8f9fa;
border-radius: 4px;
}
.category h2 {
margin-top: 0;
color: #444;
font-size: 1.2em;
}
.highlight {
background: #ffd700;
padding: 2px 4px;
border-radius: 2px;
}
.warning {
color: #856404;
background-color: #fff3cd;
border: 1px solid #ffeeba;
padding: 10px;
margin-bottom: 10px;
border-radius: 4px;
}
</style>
</head>
<body>
<div class="container">
<h1>Writing Style Analyzer</h1>
<p>Adapted from <a href="https://matt.might.net/articles/shell-scripts-for-passive-voice-weasel-words-duplicates/">these shell scripts</a> published by Matt Might.</p>
<p>Paste your text below to check for weasel words, passive voice, and duplicate words:</p>
<textarea id="input" placeholder="Enter your text here..."></textarea>
<div id="results" class="results"></div>
</div>
<script type="module">
// Weasel words from the bash script
const weaselWords = [
'many', 'various', 'very', 'fairly', 'several', 'extremely',
'exceedingly', 'quite', 'remarkably', 'few', 'surprisingly',
'mostly', 'largely', 'huge', 'tiny', 'excellent', 'interestingly',
'significantly', 'substantially', 'clearly', 'vast', 'relatively',
'completely'
];
// Common irregular verbs for passive voice detection
const irregularVerbs = [
'awoken', 'been', 'born', 'beat', 'become', 'begun', 'bent',
'bound', 'bitten', 'bled', 'blown', 'broken', 'brought',
'built', 'burnt', 'bought', 'caught', 'chosen', 'come',
'dealt', 'done', 'drawn', 'driven', 'eaten', 'fallen',
'fought', 'found', 'flown', 'forgotten', 'given', 'gone',
'grown', 'hung', 'heard', 'hidden', 'held', 'kept', 'known',
'laid', 'led', 'left', 'lost', 'made', 'meant', 'met', 'paid',
'put', 'read', 'run', 'said', 'seen', 'sold', 'sent', 'set',
'shown', 'shut', 'sung', 'sat', 'slept', 'spoken', 'spent',
'stood', 'taken', 'taught', 'told', 'thought', 'thrown',
'understood', 'worn', 'won', 'written'
];
// Helper function to strip punctuation from words
function stripPunctuation(word) {
// Remove all punctuation from the word, preserving internal hyphens
return word.replace(/^[^\w\s-]+|[^\w\s-]+$/g, '');
}
// Helper function to get word positions with their original text
function getWordPositions(text) {
const words = [];
const regex = /\S+/g;
let match;
while ((match = regex.exec(text)) !== null) {
words.push({
word: match[0],
index: match.index,
length: match[0].length
});
}
return words;
}
function getContext(text, wordPosition, prevWords = 3) {
const allWords = text.split(/\s+/);
// Find which word index we're at
let currentWordIndex = 0;
let currentPos = 0;
while (currentPos < wordPosition.index && currentWordIndex < allWords.length) {
currentPos += allWords[currentWordIndex].length;
// Account for the space after the word
if (currentPos < text.length) currentPos++;
currentWordIndex++;
}
// Get the previous N words and next few words
const start = Math.max(0, currentWordIndex - prevWords);
const end = Math.min(allWords.length, currentWordIndex + 4);
return allWords.slice(start, end).join(' ');
}
function findWeaselWords(text) {
const results = [];
const wordPositions = getWordPositions(text);
wordPositions.forEach(pos => {
const strippedWord = stripPunctuation(pos.word.toLowerCase());
if (weaselWords.includes(strippedWord)) {
results.push({
word: pos.word,
context: getContext(text, pos)
});
}
});
return results;
}
function findPassiveVoice(text) {
const results = [];
const beVerbs = ['am', 'is', 'are', 'was', 'were', 'be', 'been', 'being'];
const wordPositions = getWordPositions(text);
for (let i = 0; i < wordPositions.length - 1; i++) {
const currentWord = stripPunctuation(wordPositions[i].word.toLowerCase());
const nextWord = stripPunctuation(wordPositions[i + 1].word.toLowerCase());
if (beVerbs.includes(currentWord)) {
if (nextWord.endsWith('ed') || irregularVerbs.includes(nextWord)) {
results.push({
construction: `${wordPositions[i].word} ${wordPositions[i + 1].word}`,
context: getContext(text, wordPositions[i])
});
}
}
}
return results;
}
function findDuplicateWords(text) {
const results = [];
const wordPositions = getWordPositions(text);
for (let i = 1; i < wordPositions.length; i++) {
const currentWordStripped = stripPunctuation(wordPositions[i].word.toLowerCase());
const prevWordStripped = stripPunctuation(wordPositions[i - 1].word.toLowerCase());
if (currentWordStripped === prevWordStripped && currentWordStripped.length > 0) {
results.push({
word: wordPositions[i].word,
context: getContext(text, wordPositions[i])
});
}
}
return results;
}
function displayResults(weasels, passives, duplicates) {
const resultsDiv = document.getElementById('results');
resultsDiv.innerHTML = '';
// Weasel Words
const weaselDiv = document.createElement('div');
weaselDiv.className = 'category';
weaselDiv.innerHTML = `
<h2>Weasel Words</h2>
${weasels.length === 0 ? 'No weasel words found.' :
weasels.map(w => `
<div class="warning">
Found "<span class="highlight">${w.word}</span>" in: "${w.context}"
</div>
`).join('')}
`;
resultsDiv.appendChild(weaselDiv);
// Passive Voice
const passiveDiv = document.createElement('div');
passiveDiv.className = 'category';
passiveDiv.innerHTML = `
<h2>Passive Voice</h2>
${passives.length === 0 ? 'No passive voice constructions found.' :
passives.map(p => `
<div class="warning">
Found passive voice "<span class="highlight">${p.construction}</span>" in: "${p.context}"
</div>
`).join('')}
`;
resultsDiv.appendChild(passiveDiv);
// Duplicate Words
const duplicateDiv = document.createElement('div');
duplicateDiv.className = 'category';
duplicateDiv.innerHTML = `
<h2>Duplicate Words</h2>
${duplicates.length === 0 ? 'No duplicate words found.' :
duplicates.map(d => `
<div class="warning">
Found duplicate word "<span class="highlight">${d.word}</span>" in: "${d.context}"
</div>
`).join('')}
`;
resultsDiv.appendChild(duplicateDiv);
}
// Set up event listener
document.getElementById('input').addEventListener('input', (e) => {
const text = e.target.value;
const weasels = findWeaselWords(text);
const passives = findPassiveVoice(text);
const duplicates = findDuplicateWords(text);
displayResults(weasels, passives, duplicates);
});
</script>
</body>
</html>
The key changes from the original version include:
- Added the
stripPunctuation()
function to handle punctuation in word comparisons - Updated
findDuplicateWords()
to use the stripped versions of words for comparison - Also applied
stripPunctuation()
to the weasel words and passive voice checks for consistency - Added extra validation to ensure we don't match empty strings after stripping punctuation
The analyzer will now catch duplicate words even when they have different punctuation, while still showing the original text (with punctuation) in the results. Would you like to test it with any specific examples?
Diff: simonw/tools@0fbf511
Now deployed here: https://tools.simonwillison.net/writing-style