/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin.           */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/*   1. Redistributions of source code must retain the above         */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer.                                                  */
/*                                                                   */
/*   2. Redistributions in binary form must reproduce the above      */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer in the documentation and/or other materials       */
/*      provided with the distribution.                              */
/*                                                                   */
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFI_DUNNINGTON;  OR    */
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
/*                                                                   */
/* The views and conclusions contained in the software and           */
/* documentation are those of the authors and should not be          */
/* interpreted as representing official policies, either expressed   */
/* or implied, of The University of Texas at Austin.                 */
/*********************************************************************/

#include <stdio.h>
#include <string.h>
#include "common.h"

#ifdef BUILD_KERNEL
#include "kernel_DUNNINGTON.h"
#endif

#undef DEBUG

static void init_parameter(void);

gotoblas_t TABLE_NAME = {
  DTB_DEFAULT_ENTRIES ,

  GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,

  0, 0, 0,
  SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
#ifdef SGEMM_DEFAULT_UNROLL_MN
 SGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif


#ifdef HAVE_EXCLUSIVE_CACHE
  1,
#else
  0,
#endif

  samax_k_DUNNINGTON,  samin_k_DUNNINGTON,  smax_k_DUNNINGTON,  smin_k_DUNNINGTON,
  isamax_k_DUNNINGTON, isamin_k_DUNNINGTON, ismax_k_DUNNINGTON, ismin_k_DUNNINGTON,
  snrm2_k_DUNNINGTON,  sasum_k_DUNNINGTON,  scopy_k_DUNNINGTON, sdot_k_DUNNINGTON,
  dsdot_k_DUNNINGTON,
  srot_k_DUNNINGTON,   saxpy_k_DUNNINGTON,  sscal_k_DUNNINGTON, sswap_k_DUNNINGTON,
  sgemv_n_DUNNINGTON,  sgemv_t_DUNNINGTON, sger_k_DUNNINGTON,
  ssymv_L_DUNNINGTON, ssymv_U_DUNNINGTON,

  sgemm_kernel_DUNNINGTON, sgemm_beta_DUNNINGTON,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  sgemm_incopy_DUNNINGTON, sgemm_itcopy_DUNNINGTON,
#else
  sgemm_oncopy_DUNNINGTON, sgemm_otcopy_DUNNINGTON,
#endif
  sgemm_oncopy_DUNNINGTON, sgemm_otcopy_DUNNINGTON,
  strsm_kernel_LN_DUNNINGTON, strsm_kernel_LT_DUNNINGTON, strsm_kernel_RN_DUNNINGTON, strsm_kernel_RT_DUNNINGTON,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  strsm_iunucopy_DUNNINGTON, strsm_iunncopy_DUNNINGTON, strsm_iutucopy_DUNNINGTON, strsm_iutncopy_DUNNINGTON,
  strsm_ilnucopy_DUNNINGTON, strsm_ilnncopy_DUNNINGTON, strsm_iltucopy_DUNNINGTON, strsm_iltncopy_DUNNINGTON,
#else
  strsm_ounucopy_DUNNINGTON, strsm_ounncopy_DUNNINGTON, strsm_outucopy_DUNNINGTON, strsm_outncopy_DUNNINGTON,
  strsm_olnucopy_DUNNINGTON, strsm_olnncopy_DUNNINGTON, strsm_oltucopy_DUNNINGTON, strsm_oltncopy_DUNNINGTON,
#endif
  strsm_ounucopy_DUNNINGTON, strsm_ounncopy_DUNNINGTON, strsm_outucopy_DUNNINGTON, strsm_outncopy_DUNNINGTON,
  strsm_olnucopy_DUNNINGTON, strsm_olnncopy_DUNNINGTON, strsm_oltucopy_DUNNINGTON, strsm_oltncopy_DUNNINGTON,
  strmm_kernel_RN_DUNNINGTON, strmm_kernel_RT_DUNNINGTON, strmm_kernel_LN_DUNNINGTON, strmm_kernel_LT_DUNNINGTON,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  strmm_iunucopy_DUNNINGTON, strmm_iunncopy_DUNNINGTON, strmm_iutucopy_DUNNINGTON, strmm_iutncopy_DUNNINGTON,
  strmm_ilnucopy_DUNNINGTON, strmm_ilnncopy_DUNNINGTON, strmm_iltucopy_DUNNINGTON, strmm_iltncopy_DUNNINGTON,
#else
  strmm_ounucopy_DUNNINGTON, strmm_ounncopy_DUNNINGTON, strmm_outucopy_DUNNINGTON, strmm_outncopy_DUNNINGTON,
  strmm_olnucopy_DUNNINGTON, strmm_olnncopy_DUNNINGTON, strmm_oltucopy_DUNNINGTON, strmm_oltncopy_DUNNINGTON,
#endif
  strmm_ounucopy_DUNNINGTON, strmm_ounncopy_DUNNINGTON, strmm_outucopy_DUNNINGTON, strmm_outncopy_DUNNINGTON,
  strmm_olnucopy_DUNNINGTON, strmm_olnncopy_DUNNINGTON, strmm_oltucopy_DUNNINGTON, strmm_oltncopy_DUNNINGTON,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  ssymm_iutcopy_DUNNINGTON, ssymm_iltcopy_DUNNINGTON,
#else
  ssymm_outcopy_DUNNINGTON, ssymm_oltcopy_DUNNINGTON,
#endif
  ssymm_outcopy_DUNNINGTON, ssymm_oltcopy_DUNNINGTON,

#ifndef NO_LAPACK
  sneg_tcopy_DUNNINGTON, slaswp_ncopy_DUNNINGTON,
#else
  NULL,NULL,
#endif

  0, 0, 0,
  DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
#ifdef DGEMM_DEFAULT_UNROLL_MN
 DGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif

  damax_k_DUNNINGTON,  damin_k_DUNNINGTON,  dmax_k_DUNNINGTON,  dmin_k_DUNNINGTON,
  idamax_k_DUNNINGTON, idamin_k_DUNNINGTON, idmax_k_DUNNINGTON, idmin_k_DUNNINGTON,
  dnrm2_k_DUNNINGTON,  dasum_k_DUNNINGTON,  dcopy_k_DUNNINGTON, ddot_k_DUNNINGTON,
  drot_k_DUNNINGTON,   daxpy_k_DUNNINGTON,  dscal_k_DUNNINGTON, dswap_k_DUNNINGTON,
  dgemv_n_DUNNINGTON,  dgemv_t_DUNNINGTON,  dger_k_DUNNINGTON,
  dsymv_L_DUNNINGTON,  dsymv_U_DUNNINGTON,

  dgemm_kernel_DUNNINGTON, dgemm_beta_DUNNINGTON,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dgemm_incopy_DUNNINGTON, dgemm_itcopy_DUNNINGTON,
#else
  dgemm_oncopy_DUNNINGTON, dgemm_otcopy_DUNNINGTON,
#endif
  dgemm_oncopy_DUNNINGTON, dgemm_otcopy_DUNNINGTON,
  dtrsm_kernel_LN_DUNNINGTON, dtrsm_kernel_LT_DUNNINGTON, dtrsm_kernel_RN_DUNNINGTON, dtrsm_kernel_RT_DUNNINGTON,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dtrsm_iunucopy_DUNNINGTON, dtrsm_iunncopy_DUNNINGTON, dtrsm_iutucopy_DUNNINGTON, dtrsm_iutncopy_DUNNINGTON,
  dtrsm_ilnucopy_DUNNINGTON, dtrsm_ilnncopy_DUNNINGTON, dtrsm_iltucopy_DUNNINGTON, dtrsm_iltncopy_DUNNINGTON,
#else
  dtrsm_ounucopy_DUNNINGTON, dtrsm_ounncopy_DUNNINGTON, dtrsm_outucopy_DUNNINGTON, dtrsm_outncopy_DUNNINGTON,
  dtrsm_olnucopy_DUNNINGTON, dtrsm_olnncopy_DUNNINGTON, dtrsm_oltucopy_DUNNINGTON, dtrsm_oltncopy_DUNNINGTON,
#endif
  dtrsm_ounucopy_DUNNINGTON, dtrsm_ounncopy_DUNNINGTON, dtrsm_outucopy_DUNNINGTON, dtrsm_outncopy_DUNNINGTON,
  dtrsm_olnucopy_DUNNINGTON, dtrsm_olnncopy_DUNNINGTON, dtrsm_oltucopy_DUNNINGTON, dtrsm_oltncopy_DUNNINGTON,
  dtrmm_kernel_RN_DUNNINGTON, dtrmm_kernel_RT_DUNNINGTON, dtrmm_kernel_LN_DUNNINGTON, dtrmm_kernel_LT_DUNNINGTON,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dtrmm_iunucopy_DUNNINGTON, dtrmm_iunncopy_DUNNINGTON, dtrmm_iutucopy_DUNNINGTON, dtrmm_iutncopy_DUNNINGTON,
  dtrmm_ilnucopy_DUNNINGTON, dtrmm_ilnncopy_DUNNINGTON, dtrmm_iltucopy_DUNNINGTON, dtrmm_iltncopy_DUNNINGTON,
#else
  dtrmm_ounucopy_DUNNINGTON, dtrmm_ounncopy_DUNNINGTON, dtrmm_outucopy_DUNNINGTON, dtrmm_outncopy_DUNNINGTON,
  dtrmm_olnucopy_DUNNINGTON, dtrmm_olnncopy_DUNNINGTON, dtrmm_oltucopy_DUNNINGTON, dtrmm_oltncopy_DUNNINGTON,
#endif
  dtrmm_ounucopy_DUNNINGTON, dtrmm_ounncopy_DUNNINGTON, dtrmm_outucopy_DUNNINGTON, dtrmm_outncopy_DUNNINGTON,
  dtrmm_olnucopy_DUNNINGTON, dtrmm_olnncopy_DUNNINGTON, dtrmm_oltucopy_DUNNINGTON, dtrmm_oltncopy_DUNNINGTON,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dsymm_iutcopy_DUNNINGTON, dsymm_iltcopy_DUNNINGTON,
#else
  dsymm_outcopy_DUNNINGTON, dsymm_oltcopy_DUNNINGTON,
#endif
  dsymm_outcopy_DUNNINGTON, dsymm_oltcopy_DUNNINGTON,

#ifndef NO_LAPACK
  dneg_tcopy_DUNNINGTON, dlaswp_ncopy_DUNNINGTON,
#else
  NULL, NULL,
#endif

#ifdef EXPRECISION

  0, 0, 0,
  QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),

  qamax_k_DUNNINGTON,  qamin_k_DUNNINGTON,  qmax_k_DUNNINGTON,  qmin_k_DUNNINGTON,
  iqamax_k_DUNNINGTON, iqamin_k_DUNNINGTON, iqmax_k_DUNNINGTON, iqmin_k_DUNNINGTON,
  qnrm2_k_DUNNINGTON,  qasum_k_DUNNINGTON,  qcopy_k_DUNNINGTON, qdot_k_DUNNINGTON,
  qrot_k_DUNNINGTON,   qaxpy_k_DUNNINGTON,  qscal_k_DUNNINGTON, qswap_k_DUNNINGTON,
  qgemv_n_DUNNINGTON,  qgemv_t_DUNNINGTON,  qger_k_DUNNINGTON,
  qsymv_L_DUNNINGTON,  qsymv_U_DUNNINGTON,

  qgemm_kernel_DUNNINGTON, qgemm_beta_DUNNINGTON,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qgemm_incopy_DUNNINGTON, qgemm_itcopy_DUNNINGTON,
#else
  qgemm_oncopy_DUNNINGTON, qgemm_otcopy_DUNNINGTON,
#endif
  qgemm_oncopy_DUNNINGTON, qgemm_otcopy_DUNNINGTON,
  qtrsm_kernel_LN_DUNNINGTON, qtrsm_kernel_LT_DUNNINGTON, qtrsm_kernel_RN_DUNNINGTON, qtrsm_kernel_RT_DUNNINGTON,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qtrsm_iunucopy_DUNNINGTON, qtrsm_iunncopy_DUNNINGTON, qtrsm_iutucopy_DUNNINGTON, qtrsm_iutncopy_DUNNINGTON,
  qtrsm_ilnucopy_DUNNINGTON, qtrsm_ilnncopy_DUNNINGTON, qtrsm_iltucopy_DUNNINGTON, qtrsm_iltncopy_DUNNINGTON,
#else
  qtrsm_ounucopy_DUNNINGTON, qtrsm_ounncopy_DUNNINGTON, qtrsm_outucopy_DUNNINGTON, qtrsm_outncopy_DUNNINGTON,
  qtrsm_olnucopy_DUNNINGTON, qtrsm_olnncopy_DUNNINGTON, qtrsm_oltucopy_DUNNINGTON, qtrsm_oltncopy_DUNNINGTON,
#endif
  qtrsm_ounucopy_DUNNINGTON, qtrsm_ounncopy_DUNNINGTON, qtrsm_outucopy_DUNNINGTON, qtrsm_outncopy_DUNNINGTON,
  qtrsm_olnucopy_DUNNINGTON, qtrsm_olnncopy_DUNNINGTON, qtrsm_oltucopy_DUNNINGTON, qtrsm_oltncopy_DUNNINGTON,
  qtrmm_kernel_RN_DUNNINGTON, qtrmm_kernel_RT_DUNNINGTON, qtrmm_kernel_LN_DUNNINGTON, qtrmm_kernel_LT_DUNNINGTON,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qtrmm_iunucopy_DUNNINGTON, qtrmm_iunncopy_DUNNINGTON, qtrmm_iutucopy_DUNNINGTON, qtrmm_iutncopy_DUNNINGTON,
  qtrmm_ilnucopy_DUNNINGTON, qtrmm_ilnncopy_DUNNINGTON, qtrmm_iltucopy_DUNNINGTON, qtrmm_iltncopy_DUNNINGTON,
#else
  qtrmm_ounucopy_DUNNINGTON, qtrmm_ounncopy_DUNNINGTON, qtrmm_outucopy_DUNNINGTON, qtrmm_outncopy_DUNNINGTON,
  qtrmm_olnucopy_DUNNINGTON, qtrmm_olnncopy_DUNNINGTON, qtrmm_oltucopy_DUNNINGTON, qtrmm_oltncopy_DUNNINGTON,
#endif
  qtrmm_ounucopy_DUNNINGTON, qtrmm_ounncopy_DUNNINGTON, qtrmm_outucopy_DUNNINGTON, qtrmm_outncopy_DUNNINGTON,
  qtrmm_olnucopy_DUNNINGTON, qtrmm_olnncopy_DUNNINGTON, qtrmm_oltucopy_DUNNINGTON, qtrmm_oltncopy_DUNNINGTON,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qsymm_iutcopy_DUNNINGTON, qsymm_iltcopy_DUNNINGTON,
#else
  qsymm_outcopy_DUNNINGTON, qsymm_oltcopy_DUNNINGTON,
#endif
  qsymm_outcopy_DUNNINGTON, qsymm_oltcopy_DUNNINGTON,

#ifndef NO_LAPACK
  qneg_tcopy_DUNNINGTON, qlaswp_ncopy_DUNNINGTON,
#else
  NULL, NULL,
#endif

#endif

  0, 0, 0,
  CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
#ifdef CGEMM_DEFAULT_UNROLL_MN
 CGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
#endif

  camax_k_DUNNINGTON, camin_k_DUNNINGTON, icamax_k_DUNNINGTON, icamin_k_DUNNINGTON,
  cnrm2_k_DUNNINGTON, casum_k_DUNNINGTON, ccopy_k_DUNNINGTON,
  cdotu_k_DUNNINGTON, cdotc_k_DUNNINGTON, csrot_k_DUNNINGTON,
  caxpy_k_DUNNINGTON, caxpyc_k_DUNNINGTON, cscal_k_DUNNINGTON, cswap_k_DUNNINGTON,

  cgemv_n_DUNNINGTON, cgemv_t_DUNNINGTON, cgemv_r_DUNNINGTON, cgemv_c_DUNNINGTON,
  cgemv_o_DUNNINGTON, cgemv_u_DUNNINGTON, cgemv_s_DUNNINGTON, cgemv_d_DUNNINGTON,
  cgeru_k_DUNNINGTON, cgerc_k_DUNNINGTON, cgerv_k_DUNNINGTON, cgerd_k_DUNNINGTON,
  csymv_L_DUNNINGTON, csymv_U_DUNNINGTON,
  chemv_L_DUNNINGTON, chemv_U_DUNNINGTON, chemv_M_DUNNINGTON, chemv_V_DUNNINGTON,

  cgemm_kernel_n_DUNNINGTON, cgemm_kernel_l_DUNNINGTON, cgemm_kernel_r_DUNNINGTON, cgemm_kernel_b_DUNNINGTON,
  cgemm_beta_DUNNINGTON,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  cgemm_incopy_DUNNINGTON, cgemm_itcopy_DUNNINGTON,
#else
  cgemm_oncopy_DUNNINGTON, cgemm_otcopy_DUNNINGTON,
#endif
  cgemm_oncopy_DUNNINGTON, cgemm_otcopy_DUNNINGTON,

  ctrsm_kernel_LN_DUNNINGTON, ctrsm_kernel_LT_DUNNINGTON, ctrsm_kernel_LR_DUNNINGTON, ctrsm_kernel_LC_DUNNINGTON,
  ctrsm_kernel_RN_DUNNINGTON, ctrsm_kernel_RT_DUNNINGTON, ctrsm_kernel_RR_DUNNINGTON, ctrsm_kernel_RC_DUNNINGTON,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  ctrsm_iunucopy_DUNNINGTON,  ctrsm_iunncopy_DUNNINGTON,  ctrsm_iutucopy_DUNNINGTON,  ctrsm_iutncopy_DUNNINGTON,
  ctrsm_ilnucopy_DUNNINGTON,  ctrsm_ilnncopy_DUNNINGTON,  ctrsm_iltucopy_DUNNINGTON,  ctrsm_iltncopy_DUNNINGTON,
#else
  ctrsm_ounucopy_DUNNINGTON,  ctrsm_ounncopy_DUNNINGTON,  ctrsm_outucopy_DUNNINGTON,  ctrsm_outncopy_DUNNINGTON,
  ctrsm_olnucopy_DUNNINGTON,  ctrsm_olnncopy_DUNNINGTON,  ctrsm_oltucopy_DUNNINGTON,  ctrsm_oltncopy_DUNNINGTON,
#endif
  ctrsm_ounucopy_DUNNINGTON,  ctrsm_ounncopy_DUNNINGTON,  ctrsm_outucopy_DUNNINGTON,  ctrsm_outncopy_DUNNINGTON,
  ctrsm_olnucopy_DUNNINGTON,  ctrsm_olnncopy_DUNNINGTON,  ctrsm_oltucopy_DUNNINGTON,  ctrsm_oltncopy_DUNNINGTON,

  ctrmm_kernel_RN_DUNNINGTON,  ctrmm_kernel_RT_DUNNINGTON,  ctrmm_kernel_RR_DUNNINGTON,  ctrmm_kernel_RC_DUNNINGTON,
  ctrmm_kernel_LN_DUNNINGTON,  ctrmm_kernel_LT_DUNNINGTON,  ctrmm_kernel_LR_DUNNINGTON,  ctrmm_kernel_LC_DUNNINGTON,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  ctrmm_iunucopy_DUNNINGTON,  ctrmm_iunncopy_DUNNINGTON,  ctrmm_iutucopy_DUNNINGTON,  ctrmm_iutncopy_DUNNINGTON,
  ctrmm_ilnucopy_DUNNINGTON,  ctrmm_ilnncopy_DUNNINGTON,  ctrmm_iltucopy_DUNNINGTON,  ctrmm_iltncopy_DUNNINGTON,
#else
  ctrmm_ounucopy_DUNNINGTON,  ctrmm_ounncopy_DUNNINGTON,  ctrmm_outucopy_DUNNINGTON,  ctrmm_outncopy_DUNNINGTON,
  ctrmm_olnucopy_DUNNINGTON,  ctrmm_olnncopy_DUNNINGTON,  ctrmm_oltucopy_DUNNINGTON,  ctrmm_oltncopy_DUNNINGTON,
#endif
  ctrmm_ounucopy_DUNNINGTON,  ctrmm_ounncopy_DUNNINGTON,  ctrmm_outucopy_DUNNINGTON,  ctrmm_outncopy_DUNNINGTON,
  ctrmm_olnucopy_DUNNINGTON,  ctrmm_olnncopy_DUNNINGTON,  ctrmm_oltucopy_DUNNINGTON,  ctrmm_oltncopy_DUNNINGTON,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  csymm_iutcopy_DUNNINGTON,  csymm_iltcopy_DUNNINGTON,
#else
  csymm_outcopy_DUNNINGTON,  csymm_oltcopy_DUNNINGTON,
#endif
  csymm_outcopy_DUNNINGTON,  csymm_oltcopy_DUNNINGTON,
#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  chemm_iutcopy_DUNNINGTON,  chemm_iltcopy_DUNNINGTON,
#else
  chemm_outcopy_DUNNINGTON,  chemm_oltcopy_DUNNINGTON,
#endif
  chemm_outcopy_DUNNINGTON,  chemm_oltcopy_DUNNINGTON,

  0, 0, 0,
#ifdef CGEMM3M_DEFAULT_UNROLL_M
  CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
#else
  SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif


  cgemm3m_kernel_DUNNINGTON,

  cgemm3m_incopyb_DUNNINGTON,  cgemm3m_incopyr_DUNNINGTON,
  cgemm3m_incopyi_DUNNINGTON,  cgemm3m_itcopyb_DUNNINGTON,
  cgemm3m_itcopyr_DUNNINGTON,  cgemm3m_itcopyi_DUNNINGTON,
  cgemm3m_oncopyb_DUNNINGTON,  cgemm3m_oncopyr_DUNNINGTON,
  cgemm3m_oncopyi_DUNNINGTON,  cgemm3m_otcopyb_DUNNINGTON,
  cgemm3m_otcopyr_DUNNINGTON,  cgemm3m_otcopyi_DUNNINGTON,

  csymm3m_iucopyb_DUNNINGTON,  csymm3m_ilcopyb_DUNNINGTON,
  csymm3m_iucopyr_DUNNINGTON,  csymm3m_ilcopyr_DUNNINGTON,
  csymm3m_iucopyi_DUNNINGTON,  csymm3m_ilcopyi_DUNNINGTON,
  csymm3m_oucopyb_DUNNINGTON,  csymm3m_olcopyb_DUNNINGTON,
  csymm3m_oucopyr_DUNNINGTON,  csymm3m_olcopyr_DUNNINGTON,
  csymm3m_oucopyi_DUNNINGTON,  csymm3m_olcopyi_DUNNINGTON,

  chemm3m_iucopyb_DUNNINGTON,  chemm3m_ilcopyb_DUNNINGTON,
  chemm3m_iucopyr_DUNNINGTON,  chemm3m_ilcopyr_DUNNINGTON,
  chemm3m_iucopyi_DUNNINGTON,  chemm3m_ilcopyi_DUNNINGTON,

  chemm3m_oucopyb_DUNNINGTON,  chemm3m_olcopyb_DUNNINGTON,
  chemm3m_oucopyr_DUNNINGTON,  chemm3m_olcopyr_DUNNINGTON,
  chemm3m_oucopyi_DUNNINGTON,  chemm3m_olcopyi_DUNNINGTON,

#ifndef NO_LAPACK
  cneg_tcopy_DUNNINGTON, claswp_ncopy_DUNNINGTON,
#else
  NULL, NULL,
#endif

  0, 0, 0,
  ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
#ifdef ZGEMM_DEFAULT_UNROLL_MN
 ZGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
#endif

  zamax_k_DUNNINGTON, zamin_k_DUNNINGTON, izamax_k_DUNNINGTON, izamin_k_DUNNINGTON,
  znrm2_k_DUNNINGTON, zasum_k_DUNNINGTON, zcopy_k_DUNNINGTON,
  zdotu_k_DUNNINGTON, zdotc_k_DUNNINGTON, zdrot_k_DUNNINGTON,
  zaxpy_k_DUNNINGTON, zaxpyc_k_DUNNINGTON, zscal_k_DUNNINGTON, zswap_k_DUNNINGTON,

  zgemv_n_DUNNINGTON, zgemv_t_DUNNINGTON, zgemv_r_DUNNINGTON, zgemv_c_DUNNINGTON,
  zgemv_o_DUNNINGTON, zgemv_u_DUNNINGTON, zgemv_s_DUNNINGTON, zgemv_d_DUNNINGTON,
  zgeru_k_DUNNINGTON, zgerc_k_DUNNINGTON, zgerv_k_DUNNINGTON, zgerd_k_DUNNINGTON,
  zsymv_L_DUNNINGTON, zsymv_U_DUNNINGTON,
  zhemv_L_DUNNINGTON, zhemv_U_DUNNINGTON, zhemv_M_DUNNINGTON, zhemv_V_DUNNINGTON,

  zgemm_kernel_n_DUNNINGTON, zgemm_kernel_l_DUNNINGTON, zgemm_kernel_r_DUNNINGTON, zgemm_kernel_b_DUNNINGTON,
  zgemm_beta_DUNNINGTON,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zgemm_incopy_DUNNINGTON, zgemm_itcopy_DUNNINGTON,
#else
  zgemm_oncopy_DUNNINGTON, zgemm_otcopy_DUNNINGTON,
#endif
  zgemm_oncopy_DUNNINGTON, zgemm_otcopy_DUNNINGTON,

  ztrsm_kernel_LN_DUNNINGTON, ztrsm_kernel_LT_DUNNINGTON, ztrsm_kernel_LR_DUNNINGTON, ztrsm_kernel_LC_DUNNINGTON,
  ztrsm_kernel_RN_DUNNINGTON, ztrsm_kernel_RT_DUNNINGTON, ztrsm_kernel_RR_DUNNINGTON, ztrsm_kernel_RC_DUNNINGTON,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  ztrsm_iunucopy_DUNNINGTON,  ztrsm_iunncopy_DUNNINGTON,  ztrsm_iutucopy_DUNNINGTON,  ztrsm_iutncopy_DUNNINGTON,
  ztrsm_ilnucopy_DUNNINGTON,  ztrsm_ilnncopy_DUNNINGTON,  ztrsm_iltucopy_DUNNINGTON,  ztrsm_iltncopy_DUNNINGTON,
#else
  ztrsm_ounucopy_DUNNINGTON,  ztrsm_ounncopy_DUNNINGTON,  ztrsm_outucopy_DUNNINGTON,  ztrsm_outncopy_DUNNINGTON,
  ztrsm_olnucopy_DUNNINGTON,  ztrsm_olnncopy_DUNNINGTON,  ztrsm_oltucopy_DUNNINGTON,  ztrsm_oltncopy_DUNNINGTON,
#endif
  ztrsm_ounucopy_DUNNINGTON,  ztrsm_ounncopy_DUNNINGTON,  ztrsm_outucopy_DUNNINGTON,  ztrsm_outncopy_DUNNINGTON,
  ztrsm_olnucopy_DUNNINGTON,  ztrsm_olnncopy_DUNNINGTON,  ztrsm_oltucopy_DUNNINGTON,  ztrsm_oltncopy_DUNNINGTON,

  ztrmm_kernel_RN_DUNNINGTON,  ztrmm_kernel_RT_DUNNINGTON,  ztrmm_kernel_RR_DUNNINGTON,  ztrmm_kernel_RC_DUNNINGTON,
  ztrmm_kernel_LN_DUNNINGTON,  ztrmm_kernel_LT_DUNNINGTON,  ztrmm_kernel_LR_DUNNINGTON,  ztrmm_kernel_LC_DUNNINGTON,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  ztrmm_iunucopy_DUNNINGTON,  ztrmm_iunncopy_DUNNINGTON,  ztrmm_iutucopy_DUNNINGTON,  ztrmm_iutncopy_DUNNINGTON,
  ztrmm_ilnucopy_DUNNINGTON,  ztrmm_ilnncopy_DUNNINGTON,  ztrmm_iltucopy_DUNNINGTON,  ztrmm_iltncopy_DUNNINGTON,
#else
  ztrmm_ounucopy_DUNNINGTON,  ztrmm_ounncopy_DUNNINGTON,  ztrmm_outucopy_DUNNINGTON,  ztrmm_outncopy_DUNNINGTON,
  ztrmm_olnucopy_DUNNINGTON,  ztrmm_olnncopy_DUNNINGTON,  ztrmm_oltucopy_DUNNINGTON,  ztrmm_oltncopy_DUNNINGTON,
#endif
  ztrmm_ounucopy_DUNNINGTON,  ztrmm_ounncopy_DUNNINGTON,  ztrmm_outucopy_DUNNINGTON,  ztrmm_outncopy_DUNNINGTON,
  ztrmm_olnucopy_DUNNINGTON,  ztrmm_olnncopy_DUNNINGTON,  ztrmm_oltucopy_DUNNINGTON,  ztrmm_oltncopy_DUNNINGTON,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zsymm_iutcopy_DUNNINGTON,  zsymm_iltcopy_DUNNINGTON,
#else
  zsymm_outcopy_DUNNINGTON,  zsymm_oltcopy_DUNNINGTON,
#endif
  zsymm_outcopy_DUNNINGTON,  zsymm_oltcopy_DUNNINGTON,
#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zhemm_iutcopy_DUNNINGTON,  zhemm_iltcopy_DUNNINGTON,
#else
  zhemm_outcopy_DUNNINGTON,  zhemm_oltcopy_DUNNINGTON,
#endif
  zhemm_outcopy_DUNNINGTON,  zhemm_oltcopy_DUNNINGTON,

  0, 0, 0,
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
  ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
#else
  DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif


  zgemm3m_kernel_DUNNINGTON,

  zgemm3m_incopyb_DUNNINGTON,  zgemm3m_incopyr_DUNNINGTON,
  zgemm3m_incopyi_DUNNINGTON,  zgemm3m_itcopyb_DUNNINGTON,
  zgemm3m_itcopyr_DUNNINGTON,  zgemm3m_itcopyi_DUNNINGTON,
  zgemm3m_oncopyb_DUNNINGTON,  zgemm3m_oncopyr_DUNNINGTON,
  zgemm3m_oncopyi_DUNNINGTON,  zgemm3m_otcopyb_DUNNINGTON,
  zgemm3m_otcopyr_DUNNINGTON,  zgemm3m_otcopyi_DUNNINGTON,

  zsymm3m_iucopyb_DUNNINGTON,  zsymm3m_ilcopyb_DUNNINGTON,
  zsymm3m_iucopyr_DUNNINGTON,  zsymm3m_ilcopyr_DUNNINGTON,
  zsymm3m_iucopyi_DUNNINGTON,  zsymm3m_ilcopyi_DUNNINGTON,
  zsymm3m_oucopyb_DUNNINGTON,  zsymm3m_olcopyb_DUNNINGTON,
  zsymm3m_oucopyr_DUNNINGTON,  zsymm3m_olcopyr_DUNNINGTON,
  zsymm3m_oucopyi_DUNNINGTON,  zsymm3m_olcopyi_DUNNINGTON,

  zhemm3m_iucopyb_DUNNINGTON,  zhemm3m_ilcopyb_DUNNINGTON,
  zhemm3m_iucopyr_DUNNINGTON,  zhemm3m_ilcopyr_DUNNINGTON,
  zhemm3m_iucopyi_DUNNINGTON,  zhemm3m_ilcopyi_DUNNINGTON,

  zhemm3m_oucopyb_DUNNINGTON,  zhemm3m_olcopyb_DUNNINGTON,
  zhemm3m_oucopyr_DUNNINGTON,  zhemm3m_olcopyr_DUNNINGTON,
  zhemm3m_oucopyi_DUNNINGTON,  zhemm3m_olcopyi_DUNNINGTON,

#ifndef NO_LAPACK
  zneg_tcopy_DUNNINGTON, zlaswp_ncopy_DUNNINGTON,
#else
  NULL, NULL,
#endif

#ifdef EXPRECISION

  0, 0, 0,
  XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),

  xamax_k_DUNNINGTON, xamin_k_DUNNINGTON, ixamax_k_DUNNINGTON, ixamin_k_DUNNINGTON,
  xnrm2_k_DUNNINGTON, xasum_k_DUNNINGTON, xcopy_k_DUNNINGTON,
  xdotu_k_DUNNINGTON, xdotc_k_DUNNINGTON, xqrot_k_DUNNINGTON,
  xaxpy_k_DUNNINGTON, xaxpyc_k_DUNNINGTON, xscal_k_DUNNINGTON, xswap_k_DUNNINGTON,

  xgemv_n_DUNNINGTON, xgemv_t_DUNNINGTON, xgemv_r_DUNNINGTON, xgemv_c_DUNNINGTON,
  xgemv_o_DUNNINGTON, xgemv_u_DUNNINGTON, xgemv_s_DUNNINGTON, xgemv_d_DUNNINGTON,
  xgeru_k_DUNNINGTON, xgerc_k_DUNNINGTON, xgerv_k_DUNNINGTON, xgerd_k_DUNNINGTON,
  xsymv_L_DUNNINGTON, xsymv_U_DUNNINGTON,
  xhemv_L_DUNNINGTON, xhemv_U_DUNNINGTON, xhemv_M_DUNNINGTON, xhemv_V_DUNNINGTON,

  xgemm_kernel_n_DUNNINGTON, xgemm_kernel_l_DUNNINGTON, xgemm_kernel_r_DUNNINGTON, xgemm_kernel_b_DUNNINGTON,
  xgemm_beta_DUNNINGTON,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xgemm_incopy_DUNNINGTON, xgemm_itcopy_DUNNINGTON,
#else
  xgemm_oncopy_DUNNINGTON, xgemm_otcopy_DUNNINGTON,
#endif
  xgemm_oncopy_DUNNINGTON, xgemm_otcopy_DUNNINGTON,

  xtrsm_kernel_LN_DUNNINGTON, xtrsm_kernel_LT_DUNNINGTON, xtrsm_kernel_LR_DUNNINGTON, xtrsm_kernel_LC_DUNNINGTON,
  xtrsm_kernel_RN_DUNNINGTON, xtrsm_kernel_RT_DUNNINGTON, xtrsm_kernel_RR_DUNNINGTON, xtrsm_kernel_RC_DUNNINGTON,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xtrsm_iunucopy_DUNNINGTON,  xtrsm_iunncopy_DUNNINGTON,  xtrsm_iutucopy_DUNNINGTON,  xtrsm_iutncopy_DUNNINGTON,
  xtrsm_ilnucopy_DUNNINGTON,  xtrsm_ilnncopy_DUNNINGTON,  xtrsm_iltucopy_DUNNINGTON,  xtrsm_iltncopy_DUNNINGTON,
#else
  xtrsm_ounucopy_DUNNINGTON,  xtrsm_ounncopy_DUNNINGTON,  xtrsm_outucopy_DUNNINGTON,  xtrsm_outncopy_DUNNINGTON,
  xtrsm_olnucopy_DUNNINGTON,  xtrsm_olnncopy_DUNNINGTON,  xtrsm_oltucopy_DUNNINGTON,  xtrsm_oltncopy_DUNNINGTON,
#endif
  xtrsm_ounucopy_DUNNINGTON,  xtrsm_ounncopy_DUNNINGTON,  xtrsm_outucopy_DUNNINGTON,  xtrsm_outncopy_DUNNINGTON,
  xtrsm_olnucopy_DUNNINGTON,  xtrsm_olnncopy_DUNNINGTON,  xtrsm_oltucopy_DUNNINGTON,  xtrsm_oltncopy_DUNNINGTON,

  xtrmm_kernel_RN_DUNNINGTON,  xtrmm_kernel_RT_DUNNINGTON,  xtrmm_kernel_RR_DUNNINGTON,  xtrmm_kernel_RC_DUNNINGTON,
  xtrmm_kernel_LN_DUNNINGTON,  xtrmm_kernel_LT_DUNNINGTON,  xtrmm_kernel_LR_DUNNINGTON,  xtrmm_kernel_LC_DUNNINGTON,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xtrmm_iunucopy_DUNNINGTON,  xtrmm_iunncopy_DUNNINGTON,  xtrmm_iutucopy_DUNNINGTON,  xtrmm_iutncopy_DUNNINGTON,
  xtrmm_ilnucopy_DUNNINGTON,  xtrmm_ilnncopy_DUNNINGTON,  xtrmm_iltucopy_DUNNINGTON,  xtrmm_iltncopy_DUNNINGTON,
#else
  xtrmm_ounucopy_DUNNINGTON,  xtrmm_ounncopy_DUNNINGTON,  xtrmm_outucopy_DUNNINGTON,  xtrmm_outncopy_DUNNINGTON,
  xtrmm_olnucopy_DUNNINGTON,  xtrmm_olnncopy_DUNNINGTON,  xtrmm_oltucopy_DUNNINGTON,  xtrmm_oltncopy_DUNNINGTON,
#endif
  xtrmm_ounucopy_DUNNINGTON,  xtrmm_ounncopy_DUNNINGTON,  xtrmm_outucopy_DUNNINGTON,  xtrmm_outncopy_DUNNINGTON,
  xtrmm_olnucopy_DUNNINGTON,  xtrmm_olnncopy_DUNNINGTON,  xtrmm_oltucopy_DUNNINGTON,  xtrmm_oltncopy_DUNNINGTON,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xsymm_iutcopy_DUNNINGTON,  xsymm_iltcopy_DUNNINGTON,
#else
  xsymm_outcopy_DUNNINGTON,  xsymm_oltcopy_DUNNINGTON,
#endif
  xsymm_outcopy_DUNNINGTON,  xsymm_oltcopy_DUNNINGTON,
#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xhemm_iutcopy_DUNNINGTON,  xhemm_iltcopy_DUNNINGTON,
#else
  xhemm_outcopy_DUNNINGTON,  xhemm_oltcopy_DUNNINGTON,
#endif
  xhemm_outcopy_DUNNINGTON,  xhemm_oltcopy_DUNNINGTON,

  0, 0, 0,
  QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),

  xgemm3m_kernel_DUNNINGTON,

  xgemm3m_incopyb_DUNNINGTON,  xgemm3m_incopyr_DUNNINGTON,
  xgemm3m_incopyi_DUNNINGTON,  xgemm3m_itcopyb_DUNNINGTON,
  xgemm3m_itcopyr_DUNNINGTON,  xgemm3m_itcopyi_DUNNINGTON,
  xgemm3m_oncopyb_DUNNINGTON,  xgemm3m_oncopyr_DUNNINGTON,
  xgemm3m_oncopyi_DUNNINGTON,  xgemm3m_otcopyb_DUNNINGTON,
  xgemm3m_otcopyr_DUNNINGTON,  xgemm3m_otcopyi_DUNNINGTON,

  xsymm3m_iucopyb_DUNNINGTON,  xsymm3m_ilcopyb_DUNNINGTON,
  xsymm3m_iucopyr_DUNNINGTON,  xsymm3m_ilcopyr_DUNNINGTON,
  xsymm3m_iucopyi_DUNNINGTON,  xsymm3m_ilcopyi_DUNNINGTON,
  xsymm3m_oucopyb_DUNNINGTON,  xsymm3m_olcopyb_DUNNINGTON,
  xsymm3m_oucopyr_DUNNINGTON,  xsymm3m_olcopyr_DUNNINGTON,
  xsymm3m_oucopyi_DUNNINGTON,  xsymm3m_olcopyi_DUNNINGTON,

  xhemm3m_iucopyb_DUNNINGTON,  xhemm3m_ilcopyb_DUNNINGTON,
  xhemm3m_iucopyr_DUNNINGTON,  xhemm3m_ilcopyr_DUNNINGTON,
  xhemm3m_iucopyi_DUNNINGTON,  xhemm3m_ilcopyi_DUNNINGTON,

  xhemm3m_oucopyb_DUNNINGTON,  xhemm3m_olcopyb_DUNNINGTON,
  xhemm3m_oucopyr_DUNNINGTON,  xhemm3m_olcopyr_DUNNINGTON,
  xhemm3m_oucopyi_DUNNINGTON,  xhemm3m_olcopyi_DUNNINGTON,

#ifndef NO_LAPACK
  xneg_tcopy_DUNNINGTON, xlaswp_ncopy_DUNNINGTON,
#else
  NULL, NULL,
#endif

#endif

  init_parameter,

  SNUMOPT, DNUMOPT, QNUMOPT,

  saxpby_k_DUNNINGTON, daxpby_k_DUNNINGTON, caxpby_k_DUNNINGTON, zaxpby_k_DUNNINGTON,

  somatcopy_k_cn_DUNNINGTON, somatcopy_k_ct_DUNNINGTON, somatcopy_k_rn_DUNNINGTON, somatcopy_k_rt_DUNNINGTON,
  domatcopy_k_cn_DUNNINGTON, domatcopy_k_ct_DUNNINGTON, domatcopy_k_rn_DUNNINGTON, domatcopy_k_rt_DUNNINGTON,
  comatcopy_k_cn_DUNNINGTON, comatcopy_k_ct_DUNNINGTON, comatcopy_k_rn_DUNNINGTON, comatcopy_k_rt_DUNNINGTON,
  comatcopy_k_cnc_DUNNINGTON, comatcopy_k_ctc_DUNNINGTON, comatcopy_k_rnc_DUNNINGTON, comatcopy_k_rtc_DUNNINGTON,
  zomatcopy_k_cn_DUNNINGTON, zomatcopy_k_ct_DUNNINGTON, zomatcopy_k_rn_DUNNINGTON, zomatcopy_k_rt_DUNNINGTON,
  zomatcopy_k_cnc_DUNNINGTON, zomatcopy_k_ctc_DUNNINGTON, zomatcopy_k_rnc_DUNNINGTON, zomatcopy_k_rtc_DUNNINGTON,

  simatcopy_k_cn_DUNNINGTON, simatcopy_k_ct_DUNNINGTON, simatcopy_k_rn_DUNNINGTON, simatcopy_k_rt_DUNNINGTON,
  dimatcopy_k_cn_DUNNINGTON, dimatcopy_k_ct_DUNNINGTON, dimatcopy_k_rn_DUNNINGTON, dimatcopy_k_rt_DUNNINGTON,
  cimatcopy_k_cn_DUNNINGTON, cimatcopy_k_ct_DUNNINGTON, cimatcopy_k_rn_DUNNINGTON, cimatcopy_k_rt_DUNNINGTON,
  cimatcopy_k_cnc_DUNNINGTON, cimatcopy_k_ctc_DUNNINGTON, cimatcopy_k_rnc_DUNNINGTON, cimatcopy_k_rtc_DUNNINGTON,
  zimatcopy_k_cn_DUNNINGTON, zimatcopy_k_ct_DUNNINGTON, zimatcopy_k_rn_DUNNINGTON, zimatcopy_k_rt_DUNNINGTON,
  zimatcopy_k_cnc_DUNNINGTON, zimatcopy_k_ctc_DUNNINGTON, zimatcopy_k_rnc_DUNNINGTON, zimatcopy_k_rtc_DUNNINGTON,

  sgeadd_k_DUNNINGTON, dgeadd_k_DUNNINGTON, cgeadd_k_DUNNINGTON, zgeadd_k_DUNNINGTON

};

#ifdef ARCH_X86
static int get_l2_size_old(void){
  int i, eax, ebx, ecx, edx, cpuid_level;
  int info[15];

  cpuid(2, &eax, &ebx, &ecx, &edx);

  info[ 0] = BITMASK(eax,  8, 0xff);
  info[ 1] = BITMASK(eax, 16, 0xff);
  info[ 2] = BITMASK(eax, 24, 0xff);

  info[ 3] = BITMASK(ebx,  0, 0xff);
  info[ 4] = BITMASK(ebx,  8, 0xff);
  info[ 5] = BITMASK(ebx, 16, 0xff);
  info[ 6] = BITMASK(ebx, 24, 0xff);

  info[ 7] = BITMASK(ecx,  0, 0xff);
  info[ 8] = BITMASK(ecx,  8, 0xff);
  info[ 9] = BITMASK(ecx, 16, 0xff);
  info[10] = BITMASK(ecx, 24, 0xff);

  info[11] = BITMASK(edx,  0, 0xff);
  info[12] = BITMASK(edx,  8, 0xff);
  info[13] = BITMASK(edx, 16, 0xff);
  info[14] = BITMASK(edx, 24, 0xff);

  for (i = 0; i < 15; i++){

    switch (info[i]){

      /* This table is from http://www.sandpile.org/ia32/cpuid.htm */

    case 0x1a :
      return 96;

    case 0x39 :
    case 0x3b :
    case 0x41 :
    case 0x79 :
    case 0x81 :
      return 128;

    case 0x3a :
      return 192;

    case 0x21 :
    case 0x3c :
    case 0x42 :
    case 0x7a :
    case 0x7e :
    case 0x82 :
      return 256;

    case 0x3d :
      return 384;

    case 0x3e :
    case 0x43 :
    case 0x7b :
    case 0x7f :
    case 0x83 :
    case 0x86 :
      return 512;

    case 0x44 :
    case 0x78 :
    case 0x7c :
    case 0x84 :
    case 0x87 :
      return 1024;

    case 0x45 :
    case 0x7d :
    case 0x85 :
      return 2048;

    case 0x48 :
      return 3184;

    case 0x49 :
      return 4096;

    case 0x4e :
      return 6144;
    }
  }
  return 0;
}
#endif

static __inline__ int get_l2_size(void){

  int eax, ebx, ecx, edx, l2;

  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

  l2 = BITMASK(ecx, 16, 0xffff);

#ifndef ARCH_X86
  return l2;

#else

  if (l2 > 0) return l2;

  return get_l2_size_old();
#endif
}

static __inline__ int get_l3_size(void){

  int eax, ebx, ecx, edx;

  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

  return BITMASK(edx, 18, 0x3fff) * 512;
}


static void init_parameter(void) {

  int l2 = get_l2_size();

  TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;

#ifdef CGEMM3M_DEFAULT_Q
  TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
#else
  TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
#endif

#ifdef ZGEMM3M_DEFAULT_Q
  TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
#else
  TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
#endif

#ifdef EXPRECISION
  TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
#endif

#if defined(CORE_KATMAI)  || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)

#ifdef DEBUG
  fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
#endif

  TABLE_NAME.sgemm_p =  64 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  32 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  32 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  16 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  16 * (l2 >> 7);
  TABLE_NAME.xgemm_p =   8 * (l2 >> 7);
#endif
#endif

#ifdef CORE_NORTHWOOD

#ifdef DEBUG
  fprintf(stderr, "Northwood\n");
#endif

  TABLE_NAME.sgemm_p =  96 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  48 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  48 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  24 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  24 * (l2 >> 7);
  TABLE_NAME.xgemm_p =  12 * (l2 >> 7);
#endif
#endif

#ifdef ATOM

#ifdef DEBUG
  fprintf(stderr, "Atom\n");
#endif

  TABLE_NAME.sgemm_p = 256;
  TABLE_NAME.dgemm_p = 128;
  TABLE_NAME.cgemm_p = 128;
  TABLE_NAME.zgemm_p =  64;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  64;
  TABLE_NAME.xgemm_p =  32;
#endif
#endif

#ifdef CORE_PRESCOTT

#ifdef DEBUG
  fprintf(stderr, "Prescott\n");
#endif

  TABLE_NAME.sgemm_p =  56 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  28 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  28 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  14 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  14 * (l2 >> 7);
  TABLE_NAME.xgemm_p =   7 * (l2 >> 7);
#endif
#endif

#ifdef CORE2

#ifdef DEBUG
  fprintf(stderr, "Core2\n");
#endif

  TABLE_NAME.sgemm_p =  92 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  46 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  46 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  23 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  92 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  46 * (l2 >> 9) + 4;
#endif
#endif

#ifdef PENRYN

#ifdef DEBUG
  fprintf(stderr, "Penryn\n");
#endif

  TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
#endif
#endif

#ifdef DUNNINGTON

#ifdef DEBUG
  fprintf(stderr, "Dunnington\n");
#endif

  TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
#endif
#endif


#ifdef NEHALEM

#ifdef DEBUG
  fprintf(stderr, "Nehalem\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef SANDYBRIDGE

#ifdef DEBUG
  fprintf(stderr, "Sandybridge\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef HASWELL

#ifdef DEBUG
  fprintf(stderr, "Haswell\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef OPTERON

#ifdef DEBUG
  fprintf(stderr, "Opteron\n");
#endif

  TABLE_NAME.sgemm_p = 224 +  56 * (l2 >> 7);
  TABLE_NAME.dgemm_p = 112 +  28 * (l2 >> 7);
  TABLE_NAME.cgemm_p = 112 +  28 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  56 +  14 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  56 +  14 * (l2 >> 7);
  TABLE_NAME.xgemm_p =  28 +   7 * (l2 >> 7);
#endif
#endif

#ifdef BARCELONA

#ifdef DEBUG
  fprintf(stderr, "Barcelona\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef BOBCAT

#ifdef DEBUG
  fprintf(stderr, "Bobcate\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef BULLDOZER

#ifdef DEBUG
  fprintf(stderr, "Bulldozer\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef PILEDRIVER

#ifdef DEBUG
  fprintf(stderr, "Piledriver\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef STEAMROLLER

#ifdef DEBUG
  fprintf(stderr, "Steamroller\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef NANO

#ifdef DEBUG
  fprintf(stderr, "NANO\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;



#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif

#endif


#ifdef CGEMM3M_DEFAULT_P
  TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else
  TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
#endif

#ifdef ZGEMM3M_DEFAULT_P
  TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
#else
  TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
#endif

#ifdef EXPRECISION
  TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
#endif



  TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1);

#ifdef CGEMM3M_DEFAULT_UNROLL_M
  TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1) & ~(CGEMM3M_DEFAULT_UNROLL_M - 1);
#else
  TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef ZGEMM3M_DEFAULT_UNROLL_M
  TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1) & ~(ZGEMM3M_DEFAULT_UNROLL_M - 1);
#else
  TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef QUAD_PRECISION
  TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.xgemm3m_p = (TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef DEBUG
  fprintf(stderr, "L2 = %8d DGEMM_P  .. %d\n", l2, TABLE_NAME.dgemm_p);
#endif

  TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q *  4 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.sgemm_q *  4) - 15) & ~15);

  TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.dgemm_q *  8) - 15) & ~15);

#ifdef EXPRECISION
  TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
#endif

  TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.cgemm_q *  8) - 15) & ~15);

  TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);

  TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.cgemm3m_q *  8) - 15) & ~15);

  TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);




#ifdef EXPRECISION
  TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
		       ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);

  TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
		       ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);

#endif



}
