Reply
 
Thread Tools Display Modes
  #1   Report Post  
Rhen
 
Posts: n/a
Default How can I find if there are doubles in my list of words?

For example if I have 1000+ words on my word document, is there a way I can
automatically scan the document to find if there are any doubles in there?

Thanks in advance !

Rhen
  #2   Report Post  
Greg Maxey
 
Posts: n/a
Default

You can use this macro to determine word frequency.

Sub WordFrequency()

Dim SingleWord As String 'Raw word pulled from doc
Const maxwords = 9000 'Maximum unique words allowed
Dim Words(maxwords) As String 'Array to hold unique words
Dim Freq(maxwords) As Integer 'Frequency counter for Unique Words
Dim WordNum As Integer 'Number of unique words
Dim ByFreq As Boolean 'Flag for sorting order
Dim ttlwds As Long 'Total words in the document
Dim Excludes As String 'Words to be excluded
Dim Found As Boolean 'Temporary flag
Dim j, k, l, Temp As Integer 'Temporary variables
Dim IngWordCount As Long 'Total non-excluded words in document
Dim NonWordObjects As Long
Dim AllWordOjects As Long
Dim TotalWords As Long
Dim tword As String '

'Set up excluded words
'Excludes = "[pickleloaf][gruntbutter]"
'Excludes = Excludes & InputBox$("The following words are excluded by
default: " & Excludes & ". Enter additional words that you wish to exclude,
surrounding each word with [ ].", "Excluded Words", "")
Excludes = InputBox$("Enter words that you wish to exclude. " _
& "Place each word within square brackets [ ]. " _
& "Example: [is][a].", "Excluded Words", "")

'Find out how to sort
ByFreq = True
Ans = InputBox$("Default sort order is word freqeuncy. To sort
alphabetically by word, type Word in the field below.", "Sort order",
"FREQ")
If Ans = "" Then End
If UCase(Ans) = "WORD" Then
ByFreq = False
End If

Selection.HomeKey Unit:=wdStory
System.Cursor = wdCursorWait
WordNum = 0
ttlwds = ActiveDocument.Words.Count
'AllWordObjects = ActiveDocument.Words.Count
'TotalWords = NonWordObjects

'Control the repeat
For Each aword In ActiveDocument.Words
SingleWord = Trim(LCase(aword))
If SingleWord "a" Or SingleWord "z" Then SingleWord = "" 'Out of range?
If SingleWord "a" Or SingleWord "z" Then NonWordObjects = NonWordObjects
+ 1
'SingleWord = Trim(aword)
'If SingleWord "A" Or SingleWord "z" Then SingleWord = "" 'Out of range?
If InStr(Excludes, "[" & SingleWord & "]") Then SingleWord = "" 'On exclude
list?
If Len(SingleWord) 0 Then
IngWordCount = IngWordCount + 1
Found = False
For j = 1 To WordNum
If Words(j) = SingleWord Then
Freq(j) = Freq(j) + 1
Found = True
Exit For
End If
Next j
If Not Found Then
WordNum = WordNum + 1
Words(WordNum) = SingleWord
Freq(WordNum) = 1
End If
If WordNum maxwords - 1 Then
j = MsgBox("The maximum array size has been exceeded. Increase
maxwords.", vbOKOnly)
Exit For
End If
End If
ttlwds = ttlwds - 1
StatusBar = "Remaining: " & ttlwds & " Unique: " & WordNum
Next aword

'Now sort it into word order
For j = 1 To WordNum - 1
k = j
For l = j + 1 To WordNum
If (Not ByFreq And Words(l) Words(k)) Or (ByFreq And Freq(l) Freq(k))
Then k = l
Next l
If k j Then
tword = Words(j)
Words(j) = Words(k)
Words(k) = tword
Temp = Freq(j)
Freq(j) = Freq(k)
Freq(k) = Temp
End If
StatusBar = "Sorting: " & WordNum - j
Next j

AllWordObjects = ActiveDocument.Words.Count
NonWordObjects = NonWordObjects
TotalWords = AllWordObjects - NonWordObjects

'Now write out the results
tmpName = ActiveDocument.AttachedTemplate.FullName
Documents.Add Template:=tmpName, NewTemplate:=False
Selection.ParagraphFormat.TabStops.ClearAll
With Selection
For j = 1 To WordNum
..TypeText Text:=Words(j) & vbTab & Trim(Str(Freq(j))) & vbCrLf
Next j
End With
ActiveDocument.Range.Select
Selection.ConvertToTable
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows.Add BeforeRow:=Selection.Rows(1)
ActiveDocument.Tables(1).Cell(1, 1).Range.InsertBefore "Unique Words"
ActiveDocument.Tables(1).Cell(1, 2).Range.InsertBefore "Number of
Occurrences"
ActiveDocument.Tables(1).Columns(2).Select
Selection.ParagraphFormat.Alignment = wdAlignParagraphRight
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows(1).Shading.Backgroun dPatternColor =
wdColorGray20
ActiveDocument.Tables(1).Columns(1).PreferredWidth = InchesToPoints(4.75)
ActiveDocument.Tables(1).Columns(2).PreferredWidth = InchesToPoints(1.9)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Summary"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore "Total"
ActiveDocument.Tables(1).Rows(ActiveDocument.Table s(1).Rows.Count).Shading.BackgroundPatternColor
= wdColorGray20


ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Unique Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore Trim(Str(WordNum))
ActiveDocument.Tables(1).Rows(ActiveDocument.Table s(1).Rows.Count).Shading.BackgroundPatternColor
= wdColorAutomatic

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Non-Excluded Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore (IngWordCount)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Words (Excluded and Non-Excluded) in
Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore (TotalWords)
System.Cursor = wdCursorNormal

MsgBox "This document contains " & Trim(Str(WordNum)) & " unique words. "
MsgBox "This document contains " & IngWordCount & " non-excluded words. "
MsgBox "This document contains a total of " & TotalWords & " (excluded and
non-excluded) words. "
MsgBox "For more statistics on this document, use ToolsWord Count in the
original document. "

Selection.HomeKey wdStory

End Sub

--
Greg Maxey/Word MVP
See:
http://gregmaxey.mvps.org/word_tips.htm
For some helpful tips using Word.

Rhen wrote:
For example if I have 1000+ words on my word document, is there a way
I can automatically scan the document to find if there are any
doubles in there?

Thanks in advance !

Rhen



  #3   Report Post  
Graham Mayor
 
Posts: n/a
Default

Or you could sort the list and use Replace to eliminate the duplicates -
http://www.gmayor.com/replace_using_wildcards.htm

--

Graham Mayor - Word MVP

My web site www.gmayor.com
Word MVP web site http://word.mvps.org




Greg Maxey wrote:
You can use this macro to determine word frequency.

Sub WordFrequency()

Dim SingleWord As String 'Raw word pulled from doc
Const maxwords = 9000 'Maximum unique words allowed
Dim Words(maxwords) As String 'Array to hold unique words
Dim Freq(maxwords) As Integer 'Frequency counter for Unique Words
Dim WordNum As Integer 'Number of unique words
Dim ByFreq As Boolean 'Flag for sorting order
Dim ttlwds As Long 'Total words in the document
Dim Excludes As String 'Words to be excluded
Dim Found As Boolean 'Temporary flag
Dim j, k, l, Temp As Integer 'Temporary variables
Dim IngWordCount As Long 'Total non-excluded words in
document Dim NonWordObjects As Long
Dim AllWordOjects As Long
Dim TotalWords As Long
Dim tword As String '

'Set up excluded words
'Excludes = "[pickleloaf][gruntbutter]"
'Excludes = Excludes & InputBox$("The following words are excluded by
default: " & Excludes & ". Enter additional words that you wish to
exclude, surrounding each word with [ ].", "Excluded Words", "")
Excludes = InputBox$("Enter words that you wish to exclude. " _
& "Place each word within square brackets [ ]. " _
& "Example: [is][a].", "Excluded Words", "")

'Find out how to sort
ByFreq = True
Ans = InputBox$("Default sort order is word freqeuncy. To sort
alphabetically by word, type Word in the field below.", "Sort order",
"FREQ")
If Ans = "" Then End
If UCase(Ans) = "WORD" Then
ByFreq = False
End If

Selection.HomeKey Unit:=wdStory
System.Cursor = wdCursorWait
WordNum = 0
ttlwds = ActiveDocument.Words.Count
'AllWordObjects = ActiveDocument.Words.Count
'TotalWords = NonWordObjects

'Control the repeat
For Each aword In ActiveDocument.Words
SingleWord = Trim(LCase(aword))
If SingleWord "a" Or SingleWord "z" Then SingleWord = "" 'Out of
range? If SingleWord "a" Or SingleWord "z" Then NonWordObjects =
NonWordObjects + 1
'SingleWord = Trim(aword)
'If SingleWord "A" Or SingleWord "z" Then SingleWord = "" 'Out of
range? If InStr(Excludes, "[" & SingleWord & "]") Then SingleWord =
"" 'On exclude list?
If Len(SingleWord) 0 Then
IngWordCount = IngWordCount + 1
Found = False
For j = 1 To WordNum
If Words(j) = SingleWord Then
Freq(j) = Freq(j) + 1
Found = True
Exit For
End If
Next j
If Not Found Then
WordNum = WordNum + 1
Words(WordNum) = SingleWord
Freq(WordNum) = 1
End If
If WordNum maxwords - 1 Then
j = MsgBox("The maximum array size has been exceeded. Increase
maxwords.", vbOKOnly)
Exit For
End If
End If
ttlwds = ttlwds - 1
StatusBar = "Remaining: " & ttlwds & " Unique: " & WordNum
Next aword

'Now sort it into word order
For j = 1 To WordNum - 1
k = j
For l = j + 1 To WordNum
If (Not ByFreq And Words(l) Words(k)) Or (ByFreq And Freq(l)
Freq(k)) Then k = l
Next l
If k j Then
tword = Words(j)
Words(j) = Words(k)
Words(k) = tword
Temp = Freq(j)
Freq(j) = Freq(k)
Freq(k) = Temp
End If
StatusBar = "Sorting: " & WordNum - j
Next j

AllWordObjects = ActiveDocument.Words.Count
NonWordObjects = NonWordObjects
TotalWords = AllWordObjects - NonWordObjects

'Now write out the results
tmpName = ActiveDocument.AttachedTemplate.FullName
Documents.Add Template:=tmpName, NewTemplate:=False
Selection.ParagraphFormat.TabStops.ClearAll
With Selection
For j = 1 To WordNum
.TypeText Text:=Words(j) & vbTab & Trim(Str(Freq(j))) & vbCrLf
Next j
End With
ActiveDocument.Range.Select
Selection.ConvertToTable
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows.Add BeforeRow:=Selection.Rows(1)
ActiveDocument.Tables(1).Cell(1, 1).Range.InsertBefore "Unique Words"
ActiveDocument.Tables(1).Cell(1, 2).Range.InsertBefore "Number of
Occurrences"
ActiveDocument.Tables(1).Columns(2).Select
Selection.ParagraphFormat.Alignment = wdAlignParagraphRight
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows(1).Shading.Backgroun dPatternColor =
wdColorGray20
ActiveDocument.Tables(1).Columns(1).PreferredWidth =
InchesToPoints(4.75)
ActiveDocument.Tables(1).Columns(2).PreferredWidth =
InchesToPoints(1.9)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Summary"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore "Total"

ActiveDocument.Tables(1).Rows(ActiveDocument.Table s(1).Rows.Count).Shading.B
ackgroundPatternColor
= wdColorGray20


ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Unique Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore Trim(Str(WordNum))

ActiveDocument.Tables(1).Rows(ActiveDocument.Table s(1).Rows.Count).Shading.B
ackgroundPatternColor
= wdColorAutomatic

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Non-Excluded Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore (IngWordCount)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Words (Excluded and Non-Excluded) in
Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore (TotalWords)
System.Cursor = wdCursorNormal

MsgBox "This document contains " & Trim(Str(WordNum)) & " unique
words. " MsgBox "This document contains " & IngWordCount & "
non-excluded words. " MsgBox "This document contains a total of " &
TotalWords & " (excluded and non-excluded) words. "
MsgBox "For more statistics on this document, use ToolsWord Count in
the original document. "

Selection.HomeKey wdStory

End Sub


Rhen wrote:
For example if I have 1000+ words on my word document, is there a way
I can automatically scan the document to find if there are any
doubles in there?

Thanks in advance !

Rhen



  #4   Report Post  
Suzanne S. Barnhill
 
Posts: n/a
Default

Or you could sort the list, then glom it all into one paragraph and count on
word to mark the duplicates as "spelling" errors. g

--
Suzanne S. Barnhill
Microsoft MVP (Word)
Words into Type
Fairhope, Alabama USA
Word MVP FAQ site: http://word.mvps.org
Email cannot be acknowledged; please post all follow-ups to the newsgroup so
all may benefit.

"Graham Mayor" wrote in message
...
Or you could sort the list and use Replace to eliminate the duplicates -
http://www.gmayor.com/replace_using_wildcards.htm

--

Graham Mayor - Word MVP

My web site www.gmayor.com
Word MVP web site http://word.mvps.org




Greg Maxey wrote:
You can use this macro to determine word frequency.

Sub WordFrequency()

Dim SingleWord As String 'Raw word pulled from doc
Const maxwords = 9000 'Maximum unique words allowed
Dim Words(maxwords) As String 'Array to hold unique words
Dim Freq(maxwords) As Integer 'Frequency counter for Unique Words
Dim WordNum As Integer 'Number of unique words
Dim ByFreq As Boolean 'Flag for sorting order
Dim ttlwds As Long 'Total words in the document
Dim Excludes As String 'Words to be excluded
Dim Found As Boolean 'Temporary flag
Dim j, k, l, Temp As Integer 'Temporary variables
Dim IngWordCount As Long 'Total non-excluded words in
document Dim NonWordObjects As Long
Dim AllWordOjects As Long
Dim TotalWords As Long
Dim tword As String '

'Set up excluded words
'Excludes = "[pickleloaf][gruntbutter]"
'Excludes = Excludes & InputBox$("The following words are excluded by
default: " & Excludes & ". Enter additional words that you wish to
exclude, surrounding each word with [ ].", "Excluded Words", "")
Excludes = InputBox$("Enter words that you wish to exclude. " _
& "Place each word within square brackets [ ]. " _
& "Example: [is][a].", "Excluded Words", "")

'Find out how to sort
ByFreq = True
Ans = InputBox$("Default sort order is word freqeuncy. To sort
alphabetically by word, type Word in the field below.", "Sort order",
"FREQ")
If Ans = "" Then End
If UCase(Ans) = "WORD" Then
ByFreq = False
End If

Selection.HomeKey Unit:=wdStory
System.Cursor = wdCursorWait
WordNum = 0
ttlwds = ActiveDocument.Words.Count
'AllWordObjects = ActiveDocument.Words.Count
'TotalWords = NonWordObjects

'Control the repeat
For Each aword In ActiveDocument.Words
SingleWord = Trim(LCase(aword))
If SingleWord "a" Or SingleWord "z" Then SingleWord = "" 'Out of
range? If SingleWord "a" Or SingleWord "z" Then NonWordObjects =
NonWordObjects + 1
'SingleWord = Trim(aword)
'If SingleWord "A" Or SingleWord "z" Then SingleWord = "" 'Out of
range? If InStr(Excludes, "[" & SingleWord & "]") Then SingleWord =
"" 'On exclude list?
If Len(SingleWord) 0 Then
IngWordCount = IngWordCount + 1
Found = False
For j = 1 To WordNum
If Words(j) = SingleWord Then
Freq(j) = Freq(j) + 1
Found = True
Exit For
End If
Next j
If Not Found Then
WordNum = WordNum + 1
Words(WordNum) = SingleWord
Freq(WordNum) = 1
End If
If WordNum maxwords - 1 Then
j = MsgBox("The maximum array size has been exceeded. Increase
maxwords.", vbOKOnly)
Exit For
End If
End If
ttlwds = ttlwds - 1
StatusBar = "Remaining: " & ttlwds & " Unique: " & WordNum
Next aword

'Now sort it into word order
For j = 1 To WordNum - 1
k = j
For l = j + 1 To WordNum
If (Not ByFreq And Words(l) Words(k)) Or (ByFreq And Freq(l)
Freq(k)) Then k = l
Next l
If k j Then
tword = Words(j)
Words(j) = Words(k)
Words(k) = tword
Temp = Freq(j)
Freq(j) = Freq(k)
Freq(k) = Temp
End If
StatusBar = "Sorting: " & WordNum - j
Next j

AllWordObjects = ActiveDocument.Words.Count
NonWordObjects = NonWordObjects
TotalWords = AllWordObjects - NonWordObjects

'Now write out the results
tmpName = ActiveDocument.AttachedTemplate.FullName
Documents.Add Template:=tmpName, NewTemplate:=False
Selection.ParagraphFormat.TabStops.ClearAll
With Selection
For j = 1 To WordNum
.TypeText Text:=Words(j) & vbTab & Trim(Str(Freq(j))) & vbCrLf
Next j
End With
ActiveDocument.Range.Select
Selection.ConvertToTable
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows.Add BeforeRow:=Selection.Rows(1)
ActiveDocument.Tables(1).Cell(1, 1).Range.InsertBefore "Unique Words"
ActiveDocument.Tables(1).Cell(1, 2).Range.InsertBefore "Number of
Occurrences"
ActiveDocument.Tables(1).Columns(2).Select
Selection.ParagraphFormat.Alignment = wdAlignParagraphRight
Selection.Collapse wdCollapseStart
ActiveDocument.Tables(1).Rows(1).Shading.Backgroun dPatternColor =
wdColorGray20
ActiveDocument.Tables(1).Columns(1).PreferredWidth =
InchesToPoints(4.75)
ActiveDocument.Tables(1).Columns(2).PreferredWidth =
InchesToPoints(1.9)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Summary"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore "Total"


ActiveDocument.Tables(1).Rows(ActiveDocument.Table s(1).Rows.Count).Shading.B
ackgroundPatternColor
= wdColorGray20


ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Unique Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore Trim(Str(WordNum))


ActiveDocument.Tables(1).Rows(ActiveDocument.Table s(1).Rows.Count).Shading.B
ackgroundPatternColor
= wdColorAutomatic

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Non-Excluded Words in Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore (IngWordCount)

ActiveDocument.Tables(1).Rows.Add
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
1).Range.InsertBefore "Number of Words (Excluded and Non-Excluded) in
Document"
ActiveDocument.Tables(1).Cell(ActiveDocument.Table s(1).Rows.Count,
2).Range.InsertBefore (TotalWords)
System.Cursor = wdCursorNormal

MsgBox "This document contains " & Trim(Str(WordNum)) & " unique
words. " MsgBox "This document contains " & IngWordCount & "
non-excluded words. " MsgBox "This document contains a total of " &
TotalWords & " (excluded and non-excluded) words. "
MsgBox "For more statistics on this document, use ToolsWord Count in
the original document. "

Selection.HomeKey wdStory

End Sub


Rhen wrote:
For example if I have 1000+ words on my word document, is there a way
I can automatically scan the document to find if there are any
doubles in there?

Thanks in advance !

Rhen




Reply
Thread Tools
Display Modes

Posting Rules

Smilies are On
[IMG] code is On
HTML code is Off

Forum Jump

Similar Threads
Thread Thread Starter Forum Replies Last Post
how do I find repeating words in a list wendyp Microsoft Word Help 5 June 4th 05 06:52 PM
In a Word list style, allow different format on first/last items Anita Page Layout 2 June 2nd 05 12:35 PM
Where do I find a list of office 2003 speech commands? Fred Microsoft Word Help 2 May 27th 05 10:54 PM
Find all instances of a recurring text robfer Microsoft Word Help 2 April 22nd 05 06:02 AM
Find and Replace anomaly BruceM Microsoft Word Help 7 January 18th 05 05:47 PM


All times are GMT +1. The time now is 05:52 PM.

Copyright ©2000 - 2024, Jelsoft Enterprises Ltd.
Copyright ©2004-2024 Microsoft Office Word Forum - WordBanter.
The comments are property of their posters.
 

About Us

"It's about Microsoft Word"