/*
	Copyright (c) 2002, Thomas Kurschel
 
 
	Part of Radeon accelerant
 
	CP initialization/sync/cleanup.
 
	It also handles command buffer synchronization.
 
	non-local memory is used as following:
	- 2048 dwords for ring buffer
	- 253 indirect buffers a 4k (1024 dwords)
	- 8 dwords for returned data (i.e. current read ptr)
	  & 6 dwords for "scratch registers"
 
	usage of scratch registers:
	- reg 0 = reached engine.count
 
	with a granularity of 4 KByte, we need 2+253+1=256 blocks, which is exactly 1 MB
*/
 
#include "radeon_driver.h"
#include "CPMicroCode.h"
#include "mmio.h"
#include "cp_regs.h"
#include "pll_regs.h"
#include "rbbm_regs.h"
#include "buscntrl_regs.h"
#include "config_regs.h"
#include "memcntrl_regs.h"
#include "utils.h"
#include "pll_access.h"
 
#include "log_coll.h"
#include "log_enum.h"
 
#include <string.h>
 
#if 0
 
// macros for user-space
 
#define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
	{ \
		radeon_alloc_mem am; \
\
		am.magic = RADEON_PRIVATE_DATA_MAGIC; \
		am.size = (asize) * 4; \
		am.memory_type = (mt_nonlocal); \
		am.global = (aglobal); \
\
		res = ioctl( ai->fd, RADEON_ALLOC_MEM, &am ); \
		if( res == B_OK ) \
			*(handle) = am.handle; \
			*(offset) = am.offset; \
	}
 
#define MEM2CPU( mem ) \
	((uint32 *)(ai->mapped_memory[(mem).memory_type].data + (mem).offset))
 
#define MEM2GC( mem ) ((mem).offset + si->memory[(mem).memory_type].virtual_addr_start)
 
#define FREE_MEM( mem_type, handle ) \
	{ \
		radeon_free_mem fm; \
\
		fm.magic = RADEON_PRIVATE_DATA_MAGIC; \
		fm.memory_type = mem_type; \
		fm.handle = offset; \
\
		ioctl( ai->fd, RADEON_FREE_MEM, &fm ); \
	}
 
#else
 
// macros for kernel-space
 
// allocate memory
// if memory_type is non-local, it is replaced with default non-local type
#define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
	if( mem_type == mt_nonlocal ) \
		mem_type = di->si->nonlocal_type; \
	res = mem_alloc( di->memmgr[mem_type], asize, NULL, handle, offset );
 
// get address as seen by program to access allocated memory
// (memory_type must _not_ be non-local, see ALLOC_MEM)
#define MEM2CPU( memory_type, offset ) \
	((uint8 *)(memory_type == mt_local ? di->si->local_mem : \
	(memory_type == mt_PCI ? di->pci_gart.buffer.ptr : di->agp_gart.buffer.ptr)) \
	+ (offset))
 
// get graphics card's virtual address of allocated memory
// (memory_type must _not_ be non-local, see ALLOC_MEM)
#define MEM2GC( memory_type, offset ) \
	(di->si->memory[(memory_type)].virtual_addr_start + (offset))
 
// free memory
// if memory_type is non-local, it is replaced with default non-local type
#define FREE_MEM( mem_type, handle ) \
	mem_free( \
		di->memmgr[ mem_type == mt_nonlocal ? di->si->nonlocal_type : mem_type], \
		handle, NULL );
 
#endif
 
 
void Radeon_DiscardAllIndirectBuffers( device_info *di );
 
#define RADEON_SCRATCH_REG_OFFSET	32
 
 
void Radeon_FlushPixelCache( device_info *di );
 
// wait until engine is idle;
// acquire_lock - 	true, if lock must be hold
//					false, if lock is already acquired
// keep_lock -		true, keep lock on exit (only valid if acquire_lock is true)
void Radeon_WaitForIdle( device_info *di, bool acquire_lock, bool keep_lock )
{
	if( acquire_lock )
		ACQUIRE_BEN( di->si->cp.lock );
 
	Radeon_WaitForFifo( di, 64 );
 
	while( 1 ) {
		bigtime_t start_time = system_time();
 
		do {
			if( (INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_ACTIVE) == 0 ) {
				Radeon_FlushPixelCache( di );
 
				if( acquire_lock && !keep_lock)
					RELEASE_BEN( di->si->cp.lock );
 
				return;
			}
 
			snooze( 1 );
		} while( system_time() - start_time < 1000000 );
 
		SHOW_ERROR( 3, "Engine didn't become idle (rbbm_status=%lx, cp_stat=%lx, tlb_address=%lx, tlb_data=%lx)",
			INREG( di->regs, RADEON_RBBM_STATUS ),
			INREG( di->regs, RADEON_CP_STAT ),
			INREG( di->regs, RADEON_AIC_TLB_ADDR ),
			INREG( di->regs, RADEON_AIC_TLB_DATA ));
 
		LOG( di->si->log, _Radeon_WaitForIdle );
 
		Radeon_ResetEngine( di );
	}
}
 
 
// wait until "entries" FIFO entries are empty
// lock must be hold
void Radeon_WaitForFifo( device_info *di, int entries )
{
	while( 1 ) {
		bigtime_t start_time = system_time();
 
		do {
			int slots = INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_FIFOCNT_MASK;
 
			if ( slots >= entries )
				return;
 
			snooze( 1 );
		} while( system_time() - start_time < 1000000 );
 
		LOG( di->si->log, _Radeon_WaitForFifo );
 
		Radeon_ResetEngine( di );
	}
}
 
// flush pixel cache of graphics card
void Radeon_FlushPixelCache( device_info *di )
{
	bigtime_t start_time;
 
	OUTREGP( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL,
		~RADEON_RB2D_DC_FLUSH_ALL );
 
	start_time = system_time();
 
	do {
		if( (INREG( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT )
			 & RADEON_RB2D_DC_BUSY) == 0 )
			return;
 
		snooze( 1 );
	} while( system_time() - start_time < 1000000 );
 
	LOG( di->si->log, _Radeon_FlushPixelCache );
 
	SHOW_ERROR0( 0, "pixel cache didn't become empty" );
}
 
// reset graphics card's engine
// lock must be hold
void Radeon_ResetEngine( device_info *di )
{
	vuint8 *regs = di->regs;
	shared_info *si = di->si;
	uint32 clock_cntl_index, mclk_cntl, rbbm_soft_reset, host_path_cntl;
	uint32 cur_read_ptr;
 
	SHOW_FLOW0( 3, "" );
 
	Radeon_FlushPixelCache( di );
 
	clock_cntl_index = INREG( regs, RADEON_CLOCK_CNTL_INDEX );
	RADEONPllErrataAfterIndex( regs, di->asic );	// drm has no errata here!
	mclk_cntl = Radeon_INPLL( regs, di->asic, RADEON_MCLK_CNTL );
 
	// enable clock of units to be reset
	Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl |
      RADEON_FORCEON_MCLKA |
      RADEON_FORCEON_MCLKB |
      RADEON_FORCEON_YCLKA |
      RADEON_FORCEON_YCLKB |
      RADEON_FORCEON_MC |
      RADEON_FORCEON_AIC );
 
	// do the reset
    host_path_cntl = INREG( regs, RADEON_HOST_PATH_CNTL );
	rbbm_soft_reset = INREG( regs, RADEON_RBBM_SOFT_RESET );
 
	OUTREG( regs, RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
			RADEON_SOFT_RESET_CP |
			RADEON_SOFT_RESET_HI |
			RADEON_SOFT_RESET_SE |
			RADEON_SOFT_RESET_RE |
			RADEON_SOFT_RESET_PP |
			RADEON_SOFT_RESET_E2 |
			RADEON_SOFT_RESET_RB ) );
	INREG( regs, RADEON_RBBM_SOFT_RESET);
	OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset &
		~( RADEON_SOFT_RESET_CP |
		   RADEON_SOFT_RESET_HI |
		   RADEON_SOFT_RESET_SE |
		   RADEON_SOFT_RESET_RE |
		   RADEON_SOFT_RESET_PP |
		   RADEON_SOFT_RESET_E2 |
		   RADEON_SOFT_RESET_RB ) );
	INREG( regs, RADEON_RBBM_SOFT_RESET);
 
    OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET );
    INREG( regs, RADEON_HOST_PATH_CNTL );
    OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl );
 
	Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl );
   	OUTREG( regs, RADEON_CLOCK_CNTL_INDEX, clock_cntl_index );
   	//RADEONPllErrataAfterIndex( regs, di->asic ); // drm doesn't do this here!
   	OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
 
	if ( di->acc_dma )
	{
		// reset ring buffer
		cur_read_ptr = INREG( regs, RADEON_CP_RB_RPTR );
		OUTREG( regs, RADEON_CP_RB_WPTR, cur_read_ptr );
 
		//if( si->cp.ring.head ) {
		// during init, there are no feedback data
		if( si->cp.feedback.mem_handle != 0 ) {
			*(uint32 *)MEM2CPU( si->cp.feedback.mem_type, si->cp.feedback.head_mem_offset) =
				cur_read_ptr;
			//	*si->cp.ring.head = cur_read_ptr;
			si->cp.ring.tail = cur_read_ptr;
		}
 
		// mark all buffers as being finished
		Radeon_DiscardAllIndirectBuffers( di );
	}
 
	++si->engine.count;
	return;
}
 
 
// upload Micro-Code of CP
static void loadMicroEngineRAMData( device_info *di )
{
	int i;
	const uint32 (*microcode)[2];
 
	SHOW_FLOW0( 3, "" );
 
	switch( di->asic ) {
	case rt_r300:
	case rt_rv350:
	case rt_r350:
	case rt_rv380:
	case rt_r420:
		microcode = r300_cp_microcode;
		break;
	case rt_r200:
		microcode = r200_cp_microcode;
		break;
	case rt_rs100:
	default:
		microcode = radeon_cp_microcode;
	}
 
	Radeon_WaitForIdle( di, false, false );
 
	OUTREG( di->regs, RADEON_CP_ME_RAM_ADDR, 0 );
 
	for ( i = 0 ; i < 256 ; i++ ) {
		OUTREG( di->regs, RADEON_CP_ME_RAM_DATAH, microcode[i][1] );
		OUTREG( di->regs, RADEON_CP_ME_RAM_DATAL, microcode[i][0] );
	}
}
 
// aring_size - size of ring in dwords
static status_t initRingBuffer( device_info *di, int aring_size )
{
	status_t res;
	shared_info *si = di->si;
	CP_info *cp = &si->cp;
	vuint8 *regs = di->regs;
	int32 offset;
	memory_type_e memory_type;
 
	memset( &cp->ring, 0, sizeof( cp->ring ));
 
	// ring and indirect buffers can be either in AGP or PCI GART
	// (it seems that they cannot be in graphics memory, at least
	//  I had serious coherency problems when I tried that)
	memory_type = mt_nonlocal;
 
	ALLOC_MEM( aring_size * 4, memory_type, true,
		&cp->ring.mem_handle, &offset );
 
	if( res != B_OK ) {
		SHOW_ERROR0( 0, "Cannot allocate ring buffer" );
		return res;
	}
 
	// setup CP buffer
	cp->ring.mem_type = memory_type;
	cp->ring.mem_offset = offset;
	cp->ring.vm_base = MEM2GC( memory_type, offset );
	cp->ring.size = aring_size;
	cp->ring.tail_mask = aring_size - 1;
	OUTREG( regs, RADEON_CP_RB_BASE, cp->ring.vm_base );
	SHOW_INFO( 3, "CP buffer address=%lx", cp->ring.vm_base );
 
	// set ring buffer size
	// (it's log2 of qwords)
	OUTREG( regs, RADEON_CP_RB_CNTL, radeon_log2( cp->ring.size / 2 ));
	SHOW_INFO( 3, "CP buffer size mask=%d", radeon_log2( cp->ring.size / 2 ) );
 
	// set write pointer delay to zero;
	// we assume that memory synchronization is done correctly my MoBo
	// and Radeon_SendCP contains a hack that hopefully fixes such problems
	OUTREG( regs, RADEON_CP_RB_WPTR_DELAY, 0 );
 
	memset( MEM2CPU( cp->ring.mem_type, cp->ring.mem_offset), 0, cp->ring.size * 4 );
 
	// set CP buffer pointers
	OUTREG( regs, RADEON_CP_RB_RPTR, 0 );
	OUTREG( regs, RADEON_CP_RB_WPTR, 0 );
	//*cp->ring.head = 0;
	cp->ring.tail = 0;
 
	return B_OK;
}
 
static void uninitRingBuffer( device_info *di )
{
	vuint8 *regs = di->regs;
 
	// abort any activity
	Radeon_ResetEngine( di );
 
	// disable CP BM
	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
	// read-back for flushing
	INREG( regs, RADEON_CP_CSQ_CNTL );
 
	FREE_MEM( mt_nonlocal, di->si->cp.ring.mem_handle );
}
 
static status_t initCPFeedback( device_info *di )
{
	CP_info *cp = &di->si->cp;
	vuint8 *regs = di->regs;
	int32 offset;
	memory_type_e memory_type;
	status_t res;
 
	// status information should be in PCI memory, so CPU can
	// poll it without locking the bus (PCI memory is the only
	// cachable memory available)
	memory_type = mt_PCI;
 
	ALLOC_MEM( RADEON_SCRATCH_REG_OFFSET + 0x40, memory_type, true,
		&cp->feedback.mem_handle, &offset );
 
	if( res != B_OK ) {
		SHOW_ERROR0( 0, "Cannot allocate buffers for status information" );
		return res;
	}
 
	// setup CP read pointer buffer
	cp->feedback.mem_type = memory_type;
	cp->feedback.head_mem_offset = offset;
	cp->feedback.head_vm_address = MEM2GC( memory_type, cp->feedback.head_mem_offset );
	OUTREG( regs, RADEON_CP_RB_RPTR_ADDR, cp->feedback.head_vm_address );
	SHOW_INFO( 3, "CP read pointer buffer==%lx", cp->feedback.head_vm_address );
 
	// setup scratch register buffer
	cp->feedback.scratch_mem_offset = offset + RADEON_SCRATCH_REG_OFFSET;
	cp->feedback.scratch_vm_start = MEM2GC( memory_type, cp->feedback.scratch_mem_offset );
	OUTREG( regs, RADEON_SCRATCH_ADDR, cp->feedback.scratch_vm_start );
	OUTREG( regs, RADEON_SCRATCH_UMSK, 0x3f );
 
	*(uint32 *)MEM2CPU( cp->feedback.mem_type, cp->feedback.head_mem_offset) = 0;
	memset( MEM2CPU( cp->feedback.mem_type, cp->feedback.scratch_mem_offset), 0, 0x40 );
	//*cp->ring.head = 0;
 
	return B_OK;
}
 
static void uninitCPFeedback( device_info *di )
{
	vuint8 *regs = di->regs;
 
	// don't allow any scratch buffer update
	OUTREG( regs, RADEON_SCRATCH_UMSK, 0x0 );
 
	FREE_MEM( mt_PCI, di->si->cp.feedback.mem_handle );
}
 
static status_t initIndirectBuffers( device_info *di )
{
	CP_info *cp = &di->si->cp;
	int32 offset;
	memory_type_e memory_type;
	int i;
	status_t res;
 
	memory_type = mt_nonlocal;
 
	ALLOC_MEM( NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4, memory_type,
		true, &cp->buffers.mem_handle, &offset );
 
	if( res != B_OK ) {
		SHOW_ERROR0( 0, "Cannot allocate indirect buffers" );
		return B_ERROR;
	}
 
	cp->buffers.mem_type = memory_type;
	cp->buffers.mem_offset = offset;
	cp->buffers.vm_start = MEM2GC( memory_type, cp->buffers.mem_offset );
 
	for( i = 0; i < NUM_INDIRECT_BUFFERS - 1; ++i ) {
		cp->buffers.buffers[i].next = i + 1;
	}
 
	cp->buffers.buffers[i].next = -1;
 
	cp->buffers.free_list = 0;
	cp->buffers.oldest = -1;
	cp->buffers.newest = -1;
	cp->buffers.active_state = -1;
	cp->buffers.cur_tag = 0;
 
	memset( MEM2CPU( cp->buffers.mem_type, cp->buffers.mem_offset), 0,
		NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4 );
 
	return B_OK;
}
 
static void uninitIndirectBuffers( device_info *di )
{
	FREE_MEM( mt_nonlocal, di->si->cp.buffers.mem_handle );
}
 
// initialize CP so it's ready for BM
status_t Radeon_InitCP( device_info *di )
{
	thread_id thid;
    thread_info thinfo;
	status_t res;
 
	SHOW_FLOW0( 3, "" );
 
	// this is _really_ necessary so functions like ResetEngine() know
	// that the CP is not set up yet
	memset( &di->si->cp, 0, sizeof( di->si->cp ));
 
	if( (res = INIT_BEN( di->si->cp.lock, "Radeon CP" )) < 0 )
		return res;
 
	// HACK: change owner of benaphore semaphore to team of calling thread;
	// reason: user code cannot acquire kernel semaphores, but the accelerant
	// is in user space; interestingly, it's enough to change the semaphore's
	// owner to _any_ non-system team (that's the only security check done by
	// the kernel)
	thid = find_thread( NULL );
    get_thread_info( thid, &thinfo );
    set_sem_owner( di->si->cp.lock.sem, thinfo.team );
 
	// init raw CP
	if ( di->acc_dma ) loadMicroEngineRAMData( di );
 
	// do soft-reset
	Radeon_ResetEngine( di );
 
	// after warm-reset, the CP may still be active and thus react to
	// register writes during initialization unpredictably, so we better
	// stop it first
	OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
	INREG( di->regs, RADEON_CP_CSQ_CNTL );
 
	// reset CP to make disabling active
	Radeon_ResetEngine( di );
 
	if ( di->acc_dma )
	{
		res = initRingBuffer( di, CP_RING_SIZE );
		if( res < 0 )
			goto err4;
 
		res = initCPFeedback( di );
		if( res < 0 )
			goto err3;
 
		res = initIndirectBuffers( di );
		if( res < 0 )
			goto err2;
 
		// tell CP to use BM
		Radeon_WaitForIdle( di, false, false );
 
		// enable direct and indirect CP bus mastering
		OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM );
 
		// allow bus mastering in general
		OUTREGP( di->regs, RADEON_BUS_CNTL, 0, ~RADEON_BUS_MASTER_DIS );
	}
 
 
	// don't allow mixing of 2D/3D/scratch/wait_until commands
	// (in fact, this doesn't seem to make any difference as we do a
	// manual sync in all these cases anyway)
	OUTREG( di->regs, RADEON_ISYNC_CNTL,
		RADEON_ISYNC_ANY2D_IDLE3D |
		RADEON_ISYNC_ANY3D_IDLE2D |
		RADEON_ISYNC_WAIT_IDLEGUI |
		RADEON_ISYNC_CPSCRATCH_IDLEGUI );
 
	SHOW_FLOW( 3, "bus_cntl=%lx", INREG( di->regs, RADEON_BUS_CNTL ));
 
	SHOW_FLOW0( 3, "Done" );
 
	return B_OK;
 
//err:
//	uninitIndirectBuffers( ai );
err2:
	uninitCPFeedback( di );
err3:
	uninitRingBuffer( di );
err4:
	DELETE_BEN( di->si->cp.lock );
	return res;
}
 
 
// shutdown CP, freeing any memory
void Radeon_UninitCP( device_info *di )
{
	vuint8 *regs = di->regs;
 
	// abort any pending commands
	Radeon_ResetEngine( di );
 
	// disable CP BM
	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
	// read-back for flushing
	INREG( regs, RADEON_CP_CSQ_CNTL );
 
	if ( di->acc_dma )
	{
		uninitRingBuffer( di );
		uninitCPFeedback( di );
		uninitIndirectBuffers( di );
	}
 
	DELETE_BEN( di->si->cp.lock );
}
 
 
// mark all indirect buffers as being free;
// this should only be called after a reset;
// lock must be hold
void Radeon_DiscardAllIndirectBuffers( device_info *di )
{
	CP_info *cp = &di->si->cp;
 
	// during init, there is no indirect buffer
	if( cp->buffers.mem_handle == 0 )
		return;
 
	// mark all sent indirect buffers as free
	while( cp->buffers.oldest != -1 ) {
		indirect_buffer *oldest_buffer =
			&cp->buffers.buffers[cp->buffers.oldest];
		int tmp_oldest_buffer;
 
		SHOW_FLOW( 0, "%d", cp->buffers.oldest );
 
		// remove buffer from "used" list
		tmp_oldest_buffer = oldest_buffer->next;
 
		if( tmp_oldest_buffer == -1 )
			cp->buffers.newest = -1;
 
		// put it on free list
		oldest_buffer->next = cp->buffers.free_list;
		cp->buffers.free_list = cp->buffers.oldest;
 
		cp->buffers.oldest = tmp_oldest_buffer;
	}
}
 
// lets hide this in here, as it's got lots of lovely register headers already...
// does it go here, or in the accelerant anyway?
// for now i'm assuming you turn on dynamic clocks, and they take care of themselves onwards...
// so doing it at driver init seems sensible after a valid detection of course...
void Radeon_SetDynamicClock( device_info *di, int mode)
{
    vuint8 *regs = di->regs;
    radeon_type asic = di->asic;
    uint32 tmp;
 
    switch(mode) {
	case 0: /* Turn everything OFF (ForceON to everything)*/
		if ( di->num_crtc != 2 ) {
			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
			tmp |= (RADEON_SCLK_FORCE_CP   | RADEON_SCLK_FORCE_HDP |
				RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_TOP |
				RADEON_SCLK_FORCE_E2   | RADEON_SCLK_FORCE_SE  |
				RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_VIP |
				RADEON_SCLK_FORCE_RE   | RADEON_SCLK_FORCE_PB  |
				RADEON_SCLK_FORCE_TAM  | RADEON_SCLK_FORCE_TDM |
				RADEON_SCLK_FORCE_RB);
			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
		} else if (asic == rt_rv350) {
			/* for RV350/M10, no delays are required. */
			tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
			tmp |= (R300_SCLK_FORCE_TCL |
				R300_SCLK_FORCE_GA  |
				R300_SCLK_FORCE_CBA);
			Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
			tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
				RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
				RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
				R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
				RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
				R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
				R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
				R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
			tmp |= RADEON_SCLK_MORE_FORCEON;
			Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
			tmp |= (RADEON_FORCEON_MCLKA |
				RADEON_FORCEON_MCLKB |
				RADEON_FORCEON_YCLKA |
				RADEON_FORCEON_YCLKB |
				RADEON_FORCEON_MC);
			Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
				RADEON_PIXCLK_DAC_ALWAYS_ONb |
			R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF);
			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
				RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
				R300_DVOCLK_ALWAYS_ONb            |
				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
				RADEON_PIXCLK_GV_ALWAYS_ONb       |
				R300_PIXCLK_DVO_ALWAYS_ONb        |
				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
				RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
				R300_PIXCLK_TRANS_ALWAYS_ONb      |
				R300_PIXCLK_TVO_ALWAYS_ONb        |
				R300_P2G2CLK_ALWAYS_ONb            |
				R300_P2G2CLK_DAC_ALWAYS_ONb           |
				R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF);
			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
		}  else {
			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
			tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_E2);
			tmp |= RADEON_SCLK_FORCE_SE;
 
			if ( di->num_crtc != 2 ) {
				tmp |= ( RADEON_SCLK_FORCE_RB    |
				RADEON_SCLK_FORCE_TDM   |
				RADEON_SCLK_FORCE_TAM   |
				RADEON_SCLK_FORCE_PB    |
				RADEON_SCLK_FORCE_RE    |
				RADEON_SCLK_FORCE_VIP   |
				RADEON_SCLK_FORCE_IDCT  |
				RADEON_SCLK_FORCE_TOP   |
				RADEON_SCLK_FORCE_DISP1 |
				RADEON_SCLK_FORCE_DISP2 |
				RADEON_SCLK_FORCE_HDP    );
			} else if ((asic == rt_r300) || (asic == rt_r350)) {
				tmp |= ( RADEON_SCLK_FORCE_HDP   |
					RADEON_SCLK_FORCE_DISP1 |
					RADEON_SCLK_FORCE_DISP2 |
					RADEON_SCLK_FORCE_TOP   |
					RADEON_SCLK_FORCE_IDCT  |
					RADEON_SCLK_FORCE_VIP);
			}
			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
 
			snooze(16000);
 
			if ((asic == rt_r300) || (asic == rt_r350)) {
				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
				tmp |= ( R300_SCLK_FORCE_TCL |
					R300_SCLK_FORCE_GA  |
					R300_SCLK_FORCE_CBA);
				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
				snooze(16000);
			}
 
			if (di->is_igp) {
				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
				tmp &= ~(RADEON_FORCEON_MCLKA |
					RADEON_FORCEON_YCLKA);
				Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
				snooze(16000);
			}
 
			if ((asic == rt_rv200) ||
				(asic == rt_rv250) ||
				(asic == rt_rv280)) {
				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
				tmp |= RADEON_SCLK_MORE_FORCEON;
				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
				snooze(16000);
			}
 
			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
				RADEON_PIXCLK_GV_ALWAYS_ONb       |
				RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
				RADEON_PIXCLK_TMDS_ALWAYS_ONb);
 
			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
			snooze(16000);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
				RADEON_PIXCLK_DAC_ALWAYS_ONb);
			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
		}
		SHOW_FLOW0( 3, "Dynamic Clock Scaling Disabled" );
		break;
	case 1:
		if ( di->num_crtc != 2 ) {
			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
			if ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) > RADEON_CFG_ATI_REV_A13) {
				tmp &= ~(RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_RB);
			}
			tmp &= ~(RADEON_SCLK_FORCE_HDP  | RADEON_SCLK_FORCE_DISP1 |
				RADEON_SCLK_FORCE_TOP  | RADEON_SCLK_FORCE_SE   |
				RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_RE   |
				RADEON_SCLK_FORCE_PB   | RADEON_SCLK_FORCE_TAM  |
				RADEON_SCLK_FORCE_TDM);
			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
		} else if ((asic == rt_r300)
				|| (asic == rt_r350)
				|| (asic == rt_rv350)) {
			if (asic == rt_rv350) {
				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
				tmp &= ~(R300_SCLK_FORCE_TCL |
					R300_SCLK_FORCE_GA  |
					R300_SCLK_FORCE_CBA);
				tmp |=  (R300_SCLK_TCL_MAX_DYN_STOP_LAT |
					R300_SCLK_GA_MAX_DYN_STOP_LAT  |
					R300_SCLK_CBA_MAX_DYN_STOP_LAT);
				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
 
				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
				tmp &= ~(RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
					RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
					RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
					R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
					RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
					R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
					R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
					R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
					tmp |=  RADEON_DYN_STOP_LAT_MASK;
				Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
 
				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
				tmp &= ~RADEON_SCLK_MORE_FORCEON;
				tmp |=  RADEON_SCLK_MORE_MAX_DYN_STOP_LAT;
				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
 
				tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
				tmp |= (RADEON_PIXCLK_ALWAYS_ONb |
					RADEON_PIXCLK_DAC_ALWAYS_ONb);
				Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
 
				tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
				tmp |= (RADEON_PIX2CLK_ALWAYS_ONb         |
					RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
					RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
					R300_DVOCLK_ALWAYS_ONb            |
					RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
					RADEON_PIXCLK_GV_ALWAYS_ONb       |
					R300_PIXCLK_DVO_ALWAYS_ONb        |
					RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
					RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
					R300_PIXCLK_TRANS_ALWAYS_ONb      |
					R300_PIXCLK_TVO_ALWAYS_ONb        |
					R300_P2G2CLK_ALWAYS_ONb           |
					R300_P2G2CLK_DAC_ALWAYS_ONb);
				Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
 
				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_MISC);
				tmp |= (RADEON_MC_MCLK_DYN_ENABLE |
					RADEON_IO_MCLK_DYN_ENABLE);
				Radeon_OUTPLL(regs, asic, RADEON_MCLK_MISC, tmp);
 
				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
				tmp |= (RADEON_FORCEON_MCLKA |
					RADEON_FORCEON_MCLKB);
 
				tmp &= ~(RADEON_FORCEON_YCLKA  |
					RADEON_FORCEON_YCLKB  |
					RADEON_FORCEON_MC);
 
				/* Some releases of vbios have set DISABLE_MC_MCLKA
				and DISABLE_MC_MCLKB bits in the vbios table.  Setting these
				bits will cause H/W hang when reading video memory with dynamic clocking
				enabled. */
				if ((tmp & R300_DISABLE_MC_MCLKA) &&
				(tmp & R300_DISABLE_MC_MCLKB)) {
					/* If both bits are set, then check the active channels */
					tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
					if (di->ram.width == 64) {
						if (INREG( regs, RADEON_MEM_CNTL) & R300_MEM_USE_CD_CH_ONLY)
						tmp &= ~R300_DISABLE_MC_MCLKB;
						else
						tmp &= ~R300_DISABLE_MC_MCLKA;
					} else {
						tmp &= ~(R300_DISABLE_MC_MCLKA |
						R300_DISABLE_MC_MCLKB);
					}
				}
 
				Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
			} else {
				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
				tmp &= ~(R300_SCLK_FORCE_VAP);
				tmp |= RADEON_SCLK_FORCE_CP;
				Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
				snooze(15000);
 
				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
				tmp &= ~(R300_SCLK_FORCE_TCL |
				R300_SCLK_FORCE_GA  |
				R300_SCLK_FORCE_CBA);
				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
			}
		} else {
			tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL);
 
			tmp &= ~(RADEON_ACTIVE_HILO_LAT_MASK     |
				RADEON_DISP_DYN_STOP_LAT_MASK   |
				RADEON_DYN_STOP_MODE_MASK);
 
			tmp |= (RADEON_ENGIN_DYNCLK_MODE |
			(0x01 << RADEON_ACTIVE_HILO_LAT_SHIFT));
			Radeon_OUTPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL, tmp);
			snooze(15000);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PIN_CNTL);
			tmp |= RADEON_SCLK_DYN_START_CNTL;
			Radeon_OUTPLL(regs, asic, RADEON_CLK_PIN_CNTL, tmp);
			snooze(15000);
 
			/* When DRI is enabled, setting DYN_STOP_LAT to zero can cause some R200
			to lockup randomly, leave them as set by BIOS.
			*/
			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
			/*tmp &= RADEON_SCLK_SRC_SEL_MASK;*/
			tmp &= ~RADEON_SCLK_FORCEON_MASK;
 
			/*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300*/
			if (((asic == rt_rv250) &&
				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
				  RADEON_CFG_ATI_REV_A13)) ||
				((asic == rt_rv100) &&
				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <=
				  RADEON_CFG_ATI_REV_A13)))
			{
				tmp |= RADEON_SCLK_FORCE_CP;
				tmp |= RADEON_SCLK_FORCE_VIP;
			}
 
			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
 
			if ((asic == rt_rv200) ||
				(asic == rt_rv250) ||
				(asic == rt_rv280)) {
				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
				tmp &= ~RADEON_SCLK_MORE_FORCEON;
 
				/* RV200::A11 A12 RV250::A11 A12 */
				if (((asic == rt_rv200) ||
					 (asic == rt_rv250)) &&
					((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
					  RADEON_CFG_ATI_REV_A13))
				{
					tmp |= RADEON_SCLK_MORE_FORCEON;
				}
				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
				snooze(15000);
			}
 
			/* RV200::A11 A12, RV250::A11 A12 */
			if (((asic == rt_rv200) ||
				 (asic == rt_rv250)) &&
				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
				  RADEON_CFG_ATI_REV_A13))
			{
				tmp = Radeon_INPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL);
				tmp |= RADEON_TCL_BYPASS_DISABLE;
				Radeon_OUTPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL, tmp);
			}
			snooze(15000);
 
			/*enable dynamic mode for display clocks (PIXCLK and PIX2CLK)*/
			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
			tmp |=  (RADEON_PIX2CLK_ALWAYS_ONb         |
				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
				RADEON_PIXCLK_GV_ALWAYS_ONb       |
				RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
				RADEON_PIXCLK_TMDS_ALWAYS_ONb);
 
			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
			snooze(15000);
 
			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
			tmp |= (RADEON_PIXCLK_ALWAYS_ONb  |
				RADEON_PIXCLK_DAC_ALWAYS_ONb);
 
			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
			snooze(15000);
		}
		SHOW_FLOW0( 3, "Dynamic Clock Scaling Enabled" );
		break;
	default:
		break;
	}
}

V547 Expression 'di->num_crtc != 2' is always false.

V576 Incorrect format. Consider checking the fourth actual argument of the 'dprintf' function. The memsize type argument is expected.

V576 Incorrect format. Consider checking the fifth actual argument of the 'dprintf' function. The memsize type argument is expected.

V576 Incorrect format. Consider checking the sixth actual argument of the 'dprintf' function. The memsize type argument is expected.

V576 Incorrect format. Consider checking the seventh actual argument of the 'dprintf' function. The memsize type argument is expected.

V576 Incorrect format. Consider checking the fourth actual argument of the 'dprintf' function. The memsize type argument is expected.

V576 Incorrect format. Consider checking the fourth actual argument of the 'dprintf' function. The memsize type argument is expected.

V576 Incorrect format. Consider checking the fourth actual argument of the 'dprintf' function. The memsize type argument is expected.