(*
 * Copyright (c) 2001 Stefan Kral
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *)

(* The functions in this module model some aspects of the execution behaviour
 * of the P4 processor. The instruction scheduler uses these functions for
 * selecting instructions that do not require the same execution resources.  *)

open P4Basics
open NonDetMonad

(* Do not care about address generation. (Basically assume that enough 
 * address-generation units (AGUs) are available at any time.) *)

type p4executionresource = 		(* P4 EXECUTION RESOURCE ************)
  | P4E_Port0				(*   ALU and FPMove instructions    *)
  | P4E_Port1 				(*   ALU, Integer and FP instrs     *)
  | P4E_Port2				(*   All Loads, LEA, Prefetch 	    *)
  | P4E_Port3				(*   Store			    *)
  | P4E_P0_ALU
  | P4E_P0_FPMove
  | P4E_P0_FPStore
  | P4E_P1_ALU
  | P4E_P1_IEU
  | P4E_P1_FPAdd
  | P4E_P1_FPMul
  | P4E_P1_FPDiv
  | P4E_P1_FPMisc
  | P4E_P1_MMXShift
  | P4E_P1_MMXALU
  | P4E_P1_MMXMisc


(* Per cycle the following resources are available: *)
let p4ResourcesPerCycle =
  [P4E_Port0; P4E_Port1; P4E_Port2; P4E_Port3;
   P4E_Port0; P4E_Port1; P4E_Port2; P4E_Port3;
   P4E_P0_ALU; P4E_P0_FPMove; 
   P4E_P0_ALU; P4E_P0_FPMove; 
   P4E_P1_ALU; P4E_P1_IEU; 
   P4E_P1_ALU; P4E_P1_IEU; 
   P4E_P1_FPAdd; P4E_P1_FPMul; P4E_P1_FPDiv; P4E_P1_FPMisc;
   P4E_P1_MMXShift; P4E_P1_MMXALU; P4E_P1_MMXMisc]


(* resources that are available at a given time *) 
let fetchResourcesM = fetchStateM
let storeResourcesM = storeStateM

let consumeNoneM = unitM ()
let consumeOneM r = fetchResourcesM >>= deleteFirstM ((=)r) >>= storeResourcesM
let consumeOneOfM rs = memberM rs >>= consumeOneM
let consumeManyM = iterM consumeOneM 
let consumeAllM = storeResourcesM []

(***************************************************************************)

let loadConsumesM  = consumeOneM P4E_Port3

let intConsumesM = 
  memberM [[P4E_Port0; P4E_P0_ALU]; [P4E_Port1; P4E_P1_ALU]] >>= 
    consumeManyM

let fpmulConsumesM = consumeManyM [P4E_Port1; P4E_P1_FPMul]

let fpaddConsumesM = consumeManyM [P4E_Port1; P4E_P1_FPAdd]

let shuffleConsumesM = consumeManyM [P4E_Port1; P4E_P1_MMXShift]

(*****************************************************************************)

let p4simdunaryopConsumesM = function
  | P4_FPChs _ | P4_FPMulC1 _ | P4_FPMulC2 _ -> fpmulConsumesM

let p4simdbinopConsumesM = function
  | P4_Shuffle _ | P4_UnpckLo | P4_UnpckHi -> shuffleConsumesM
  | P4_FPAdd1 | P4_FPAdd2 | P4_FPSub1 | P4_FPSub2 -> fpaddConsumesM
  | P4_FPMul1 | P4_FPMul2 -> fpmulConsumesM 

let p4simdcpyunaryopConsumesM = function
  | P4_FPId -> consumeManyM [P4E_Port0; P4E_P0_FPMove]

let p4intcpyunaryopConsumesM = function
  | P4_IMulImm _ -> consumeAllM
  | P4_ICopy     -> intConsumesM

let p4rinstrConsumesM = function
  | P4R_SimdLoadStoreBarrier	  -> consumeNoneM
  | P4R_SimdPromiseCellSize _ 	  -> consumeNoneM
  | P4R_Ret 			  -> consumeNoneM
  | P4R_CondBranch _ 		  -> intConsumesM
  | P4R_Jump _ 			  -> intConsumesM
  | P4R_Label _ 		  -> intConsumesM
  | P4R_IntLoadEA _ 	          -> consumeOneM P4E_Port2
  | P4R_IntBinOp _ 	          -> intConsumesM
  | P4R_IntUnaryOp _ 	          -> intConsumesM
  | P4R_IntCpyUnaryOp(op,_,_)     -> p4intcpyunaryopConsumesM op
  | P4R_IntBinOpMem _ 	          -> intConsumesM >> loadConsumesM
  | P4R_IntUnaryOpMem _           -> intConsumesM >> loadConsumesM
  | P4R_IntLoadMem _ 	          -> intConsumesM >> loadConsumesM
  | P4R_IntStoreMem _ 	          -> consumeOneM P4E_Port3
  | P4R_SimdBinOp(op,_,_) 	  -> p4simdbinopConsumesM op
  | P4R_SimdBinOpMem(op,_,_)  	  -> p4simdbinopConsumesM op >> loadConsumesM
  | P4R_SimdUnaryOp(op,_)     	  -> p4simdunaryopConsumesM op
  | P4R_SimdCpyUnaryOp(op,_,_)    -> p4simdcpyunaryopConsumesM op
  | P4R_SimdCpyUnaryOpMem(op,_,_) -> p4simdcpyunaryopConsumesM op >> 
				       loadConsumesM
  | P4R_SimdLoad _ | P4R_SimdLoad1 _ ->
      loadConsumesM
  | P4R_SimdStore _ | P4R_SimdStore1 _ | P4R_SimdSpill _ ->
      consumeManyM [P4E_Port0; P4E_P0_FPMove; P4E_Port3]

(* EXPORTED FUNCTIONS *******************************************************)

let p4SlotsPerCycle = 4		      (* number of slots per two-cycles *)

(* checks if a some given instructions can be issued in the same cycle. *)
let p4rinstrsCanIssueInOneCycle instrs =
  List.length instrs <= p4SlotsPerCycle &&
  runP (iterM p4rinstrConsumesM) instrs p4ResourcesPerCycle
