# We attempt to parse C extension files. Basically we look for
# the standard patterns that you find in extensions: <tt>rb_define_class,
# rb_define_method</tt> and so on. We also try to find the corresponding
# C source for the methods and extract comments, but if we fail
# we don't worry too much.
#
# The comments associated with a Ruby method are extracted from the C
# comment block associated with the routine that _implements_ that
# method, that is to say the method whose name is given in the
# <tt>rb_define_method</tt> call. For example, you might write:
#
#  /*
#   * Returns a new array that is a one-dimensional flattening of this
#   * array (recursively). That is, for every element that is an array,
#   * extract its elements into the new array.
#   *
#   *    s = [ 1, 2, 3 ]           #=> [1, 2, 3]
#   *    t = [ 4, 5, 6, [7, 8] ]   #=> [4, 5, 6, [7, 8]]
#   *    a = [ s, t, 9, 10 ]       #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
#   *    a.flatten                 #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
#   */
#   static VALUE
#   rb_ary_flatten(ary)
#       VALUE ary;
#   {
#       ary = rb_obj_dup(ary);
#       rb_ary_flatten_bang(ary);
#       return ary;
#   }
#
#   ...
#
#   void
#   Init_Array()
#   {
#     ...
#     rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
#
# Here RDoc will determine from the rb_define_method line that there's a
# method called "flatten" in class Array, and will look for the implementation
# in the method rb_ary_flatten. It will then use the comment from that
# method in the HTML output. This method must be in the same source file
# as the rb_define_method.
#
# C classes can be diagramed (see /tc/dl/ruby/ruby/error.c), and RDoc
# integrates C and Ruby source into one tree

require "rdoc/code_objects"

# Classes and modules built in to the interpreter. We need
# these to define superclasses of user objects

KNOWN_CLASSES = {
  "rb_cObject"           => "Object",
  "rb_cArray"            => "Array",
  "rb_cBignum"           => "Bignum",
  "rb_cClass"            => "Class",
  "rb_cDir"              => "Dir",
  "rb_cData"             => "Data",
  "rb_cFalseClass"       => "FalseClass",
  "rb_cFile"             => "File",
  "rb_cFixnum"           => "Fixnum",
  "rb_cFloat"            => "Float",
  "rb_cHash"             => "Hash",
  "rb_cInteger"          => "Integer",
  "rb_cIO"               => "IO",
  "rb_cModule"           => "Module",
  "rb_cNilClass"         => "NilClass",
  "rb_cNumeric"          => "Numeric",
  "rb_cProc"             => "Proc",
  "rb_cRange"            => "Range",
  "rb_cRegexp"           => "Regexp",
  "rb_cString"           => "String",
  "rb_cSymbol"           => "Symbol",
  "rb_cThread"           => "Thread",
  "rb_cTime"             => "Time",
  "rb_cTrueClass"        => "TrueClass",
  "rb_cStruct"           => "Struct",
  "rb_eException"        => "Exception",
  "rb_eStandardError"    => "StandardError",
  "rb_eSystemExit"       => "SystemExit",
  "rb_eInterrupt"        => "Interrupt",
  "rb_eSignal"           => "Signal",
  "rb_eFatal"            => "Fatal",
  "rb_eArgError"         => "ArgError",
  "rb_eEOFError"         => "EOFError",
  "rb_eIndexError"       => "IndexError",
  "rb_eRangeError"       => "RangeError",
  "rb_eIOError"          => "IOError",
  "rb_eRuntimeError"     => "RuntimeError",
  "rb_eSecurityError"    => "SecurityError",
  "rb_eSystemCallError"  => "SystemCallError",
  "rb_eTypeError"        => "TypeError",
  "rb_eZeroDivError"     => "ZeroDivError",
  "rb_eNotImpError"      => "NotImpError",
  "rb_eNoMemError"       => "NoMemError",
  "rb_eFloatDomainError" => "FloatDomainError",
  "rb_eScriptError"      => "ScriptError",
  "rb_eNameError"        => "NameError",
  "rb_eSyntaxError"      => "SyntaxError",
  "rb_eLoadError"        => "LoadError",

  "rb_mKernel"           => "Kernel",
  "rb_mComparable"       => "Comparable",
  "rb_mEnumerable"       => "Enumerable",
  "rb_mPrecision"        => "Precision",
  "rb_mErrno"            => "Errno",
  "rb_mFileTest"         => "FileTest",
  "rb_mGC"               => "GC",
  "rb_mMath"             => "Math",
  "rb_mProcess"          => "Process"

}

# See rdoc/c_parse.rb

class C_Parser
  
  # prepare to parse a C file
  def initialize(file_name, body, options)
    @known_classes = KNOWN_CLASSES.dup
    @body = body
    @options = options
    @top_level = RDoc::TopLevel.new(file_name)
    @classes = Hash.new
  end

  # Extract the classes/modules and methods from a C file
  # and return the corresponding top-level object
  def scan
    do_classes
    do_methods
    @top_level
  end

  #######
  private
  #######

  def do_classes
    @body.scan(/(\w+)\s* = \s*rb_define_(class|module)
                \( 
                   \s*"(\w+)",
                   \s*(\w+)\s*
                \)/mx) do 

      |var_name, class_mod, class_name, parent|
      
      @known_classes[var_name] = class_name
      parent_name = @known_classes[parent] || parent

      if class_mod == "class" 
        cm = @top_level.add_class(RDoc::NormalClass, class_name, "", parent_name)
      else
        cm = @top_level.add_module(RDoc::NormalModule, class_name, "")
      end
      cm.record_location(@top_level)
      @classes[var_name] = cm
    end
  end
  
  
  def do_methods
    @body.scan(/rb_define(_singleton)?_method\(\s*(\w+),
                               \s*"([\w!?]+)",
                               \s*(\w+),
                               \s*(-?\w+)\s*\)/xm) do 
      |single, var_name, meth_name, meth_body, param_count|
      
      class_name = @known_classes[var_name] || var_name
      class_obj  = @classes[var_name]
      if class_obj
        meth_obj = RDoc::AnyMethod.new("", meth_name, "")
        meth_obj.singleton = single

        p_count = (Integer(param_count) rescue -1)
        
        if p_count < 0
          meth_obj.params = "(...)"
        elsif p_count == 0
          meth_obj.params = "()"
        else
          meth_obj.params = "(" +
            (1..p_count).map{|i| "p#{i}"}.join(", ") + 
            ")"
        end
        
        find_body(meth_body, meth_obj)
        class_obj.add_method(meth_obj)
      end
    end
  end

  # Find the C code corresponding to a c method
  def find_body(meth_name, meth_obj)
    if @body =~ %r{((?>/\*.*?\*/\s+))(static\s+)?VALUE\s+#{meth_name}
                    \s*(\(.*?\)).*?^}xm

      comment, params = $1, $3

      meth_obj.comment = mangle_comment(comment)

      meth_obj.params = params
      meth_obj.start_collecting_tokens
      meth_obj.add_token(RubyToken::Token.new(1,1).set_text($&))
    end
  end

  # Remove the /*'s and leading asterisks from C comments

  def mangle_comment(comment)
    comment.sub!(%r{/\*+}) { " " * $&.length }
    comment.sub!(%r{\*+/}) { " " * $&.length }
    comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
    comment
  end

end
