class RDoc::Generator::Spellcheck

A spell checking generator for RDoc.

This generator creates a report of misspelled words. You can use it to find when you acidentally make a typo. For example, this line contains one.

Constants

DEFAULT_WORDS

A list of common words that aspell may not include, but are commonly used in ruby programs.

SpellLanguage

OptionParser validator for Aspell language dictionaries

VERSION

This version of rdoc-spellcheck

Public Class Methods

new(options) click to toggle source
# File lib/rdoc/generator/spellcheck.rb, line 424
def initialize options # :not-new:
  @options = options

  @encoding            = @options.encoding
  @aggregate_all       = @options.spell_aggregate_all
  @minimum_word_length = @options.spell_minimum_word_length
  @source_dir          = @options.spell_source_dir

  @misspellings = Hash.new 0

  @spell = Aspell.new @options.spell_language, nil, nil, @encoding.name
  @spell.suggestion_mode = Aspell::NORMAL
  @spell.set_option 'run-together', 'true'

  if words = @options.spell_add_words then
    words.each do |word|
      @spell.add_to_personal word
    end

    @spell.save_all_word_lists
  end
end
setup_options(options) click to toggle source

Adds rdoc-spellcheck options to the rdoc command

# File lib/rdoc/generator/spellcheck.rb, line 355
def self.setup_options options
  default_language, = ENV['LANG'].split '.'

  options.spell_add_words           = false
  options.spell_language            = default_language
  options.spell_minimum_word_length = 4
  options.spell_source_dir          = Dir.pwd
  options.quiet                     = true # suppress statistics

  op = options.option_parser

  op.accept SpellLanguage do |language|
    found = Aspell.list_dicts.find do |dict|
      dict.name == language
    end

    raise OptionParser::InvalidArgument,
          "dictionary #{language} not installed" unless found

    language
  end

  op.separator nil
  op.separator 'Spellcheck options:'
  op.separator nil

  op.on('--spell-add-words [WORDLIST]',
        'Adds words to the aspell personal wordlist.',
        'The word list may be a comma-separated',
        'list of words which must contain multiple',
        'words, a file or empty to read words from',
        'stdin') do |wordlist|
    words = if wordlist.nil? then
              $stdin.read.split
            elsif wordlist =~ %r,/ then
              wordlist.split ','
            else
              open wordlist do |io|
                io.read.split
              end
            end

    options.spell_add_words = words
  end

  op.separator nil

  op.on('--[no-]spell-aggregate-all',
        'Show aggregate counts for all misspellings.') do |aggregate_all|
    options.spell_aggregate_all = aggregate_all
  end

  op.separator nil

  op.on('--spell-language=LANGUAGE', SpellLanguage,
        'Language to use for spell checking.',
        "The default language is #{default_language}") do |language|
    options.spell_language = language
  end

  op.separator nil

  op.on('--spell-minimum-word-length=LENGTH', Integer,
        'Minimum length of a word to spell check.',
        "The default is #{options.spell_minimum_word_length}") do |length|
    options.spell_minimum_word_length = length
  end
end

Public Instance Methods

add_name(name) click to toggle source

Adds name to the dictionary, splitting the word on ‘_’ (a character Aspell does not allow)

# File lib/rdoc/generator/spellcheck.rb, line 451
def add_name name
  name.scan(%r[a-z]+/) do |part|
    @spell.add_to_session part
  end
end
find_misspelled(comment) click to toggle source

Returns a report of misspelled words in comment. The report contains each misspelled word and its offset in the comment’s text.

# File lib/rdoc/generator/spellcheck.rb, line 461
def find_misspelled comment
  report = []

  comment.text.scan(%r\p{L}[\p{L}']+\p{L}/) do |word|
    next if $&.length < @minimum_word_length
    offset = $`.length # store

    word = $` if word =~ %r's$/

    next if @spell.check word

    offset = offset.zero? ? 0 : offset + 1

    report << [word, offset]

    @misspellings[word] += 1
  end

  report
end
generate(files) click to toggle source

Creates the spelling report

# File lib/rdoc/generator/spellcheck.rb, line 485
def generate files
  setup_dictionary

  report = []

  RDoc::TopLevel.all_classes_and_modules.each do |mod|
    mod.comment_location.each do |comment, location|
      report.concat misspellings_for(mod.definition, comment, location)
    end

    mod.each_include do |incl|
      name = "#{incl.parent.full_name}.include #{incl.name}"

      report.concat misspellings_for(name, incl.comment, incl.file)
    end

    mod.each_constant do |const|
      # TODO add missing RDoc::Constant#full_name
      name = const.parent ? const.parent.full_name : '(unknown)'
      name = "#{name}::#{const.name}"

      report.concat misspellings_for(name, const.comment, const.file)
    end

    mod.each_attribute do |attr|
      name = "#{attr.parent.full_name}.#{attr.definition} :#{attr.name}"

      report.concat misspellings_for(name, attr.comment, attr.file)
    end

    mod.each_method do |meth|
      report.concat misspellings_for(meth.full_name, meth.comment, meth.file)
    end
  end

  RDoc::TopLevel.all_files.each do |file|
    report.concat misspellings_for(nil, file.comment, file)
  end

  if @misspellings.empty? then
    puts 'No misspellings found'
  else
    puts report.join "\n"
    puts

    num_width = @misspellings.values.max.to_s.length
    order = @misspellings.sort_by do |word, count|
      [-count, word]
    end

    order = order.first 10 unless @aggregate_all

    puts 'Aggregate misspellings:'
    order.each do |word, count|
      puts "%*d %s" % [num_width, count, word]
    end

    total = @misspellings.values.inject :+

    puts
    puts "Total misspellings: #{total}"
  end
end
location_of(text, offset, file) click to toggle source

Determines the line and column of the misspelling in comment at offset in the file.

# File lib/rdoc/generator/spellcheck.rb, line 553
def location_of text, offset, file
  last_newline = text[0, offset].rindex "\n"
  start_of_line = last_newline ? last_newline + 1 : 0

  line_text = text[start_of_line..offset]

  full_path = File.expand_path file.absolute_name, @source_dir

  file_content = RDoc::Encoding.read_file full_path, @encoding

  raise "[bug] Unable to read #{full_path}" unless file_content

  file_content.each_line.with_index do |line, index|
    if line =~ %r#{Regexp.escape line_text}/ then
      column = $`.length + line_text.length
      return index, column
    end
  end

  # TODO typos in include file

  nil
end
misspellings_for(name, comment, location) click to toggle source

Returns a report of misspellings the comment at location for documentation item name

# File lib/rdoc/generator/spellcheck.rb, line 581
def misspellings_for name, comment, location
  out = []

  return out if comment.empty?

  misspelled = find_misspelled comment

  return out if misspelled.empty?

  if name then
    out << "#{name} in #{location.full_name}:"
  else
    out << "In #{location.full_name}:"
  end

  out << nil

  out.concat misspelled.flat_map { |word, offset|
    suggestion = suggestion_text comment.text, word, offset
    line, column = location_of word, offset, location

    if line then
      ["#{location.absolute_name}:#{line}:#{column}", suggestion]
    else
      ["(via include)", suggestion]
    end
  }

  out
end
setup_dictionary() click to toggle source

Adds file names, class names, module names, method names, etc. from the documentation tree to the session spelling dictionary.

# File lib/rdoc/generator/spellcheck.rb, line 616
def setup_dictionary
  DEFAULT_WORDS.each do |word|
    add_name word
  end

  RDoc::TopLevel.all_classes_and_modules.each do |mod|
    add_name mod.name

    mod.each_include do |incl|
      add_name incl.name
    end

    mod.each_constant do |const|
      add_name const.name
    end

    mod.each_attribute do |attr|
      add_name attr.name
    end

    mod.each_method do |meth|
      add_name meth.name
      add_name meth.params       if meth.params
      add_name meth.block_params if meth.block_params
    end
  end

  RDoc::TopLevel.all_files.each do |file|
    file.absolute_name.split(%r[/\\.]%).each do |part|
      add_name part
    end
  end
end
suggestion_text(text, word, offset) click to toggle source

Creates suggestion text for the misspelled word at offset in text

# File lib/rdoc/generator/spellcheck.rb, line 653
  def suggestion_text text, word, offset
    prefix = offset - 10
    prefix = 0 if prefix < 0

    text =~ %r\A.{#{prefix}}(.{0,10})#{Regexp.escape word}(.{0,10})/

    before    = "#{prefix.zero? ? nil : '...'}#{$1}"
    after     = "#{$2}#{$2.length < 10 ? nil : '...'}"

    highlight = "\e[1;31m#{word}\e[m"

    suggestions = @spell.suggest(word).first 5

    ""#{before}#{highlight}#{after}"

"#{word}" suggestions:
\t#{suggestions.join ', '}

"
  rescue => e
    $stderr.puts "[bug] #{e.class}: #{e.message}"
    $stderr.puts
    $stderr.puts "word:   #{word}"
    $stderr.puts "offset: #{offset}"
    $stderr.puts ">>>> start text <<<<\n#{text}\n>>>>> end text <<<<<"
    raise
  end