/*
 * Copyright 2004-2012, Axel Dörfler, axeld@pinc-software.de.
 * Distributed under the terms of the MIT License.
 */
 
 
#include <block_cache.h>
 
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
 
#include <KernelExport.h>
#include <fs_cache.h>
 
#include <condition_variable.h>
#include <lock.h>
#include <low_resource_manager.h>
#include <slab/Slab.h>
#include <tracing.h>
#include <util/kernel_cpp.h>
#include <util/DoublyLinkedList.h>
#include <util/AutoLock.h>
#include <vm/vm_page.h>
 
#include "kernel_debug_config.h"
 
 
// TODO: this is a naive but growing implementation to test the API:
//	block reading/writing is not at all optimized for speed, it will
//	just read and write single blocks.
// TODO: the retrieval/copy of the original data could be delayed until the
//		new data must be written, ie. in low memory situations.
 
//#define TRACE_BLOCK_CACHE
#ifdef TRACE_BLOCK_CACHE
#	define TRACE(x)	dprintf x
#else
#	define TRACE(x) ;
#endif
 
#define TRACE_ALWAYS(x) dprintf x
 
// This macro is used for fatal situations that are acceptable in a running
// system, like out of memory situations - should only panic for debugging.
#define FATAL(x) panic x
 
static const bigtime_t kTransactionIdleTime = 2000000LL;
	// a transaction is considered idle after 2 seconds of inactivity
 
 
namespace {
 
struct cache_transaction;
struct cached_block;
struct block_cache;
typedef DoublyLinkedListLink<cached_block> block_link;
 
struct cached_block {
	cached_block*	next;			// next in hash
	cached_block*	transaction_next;
	block_link		link;
	off_t			block_number;
	void*			current_data;
		// The data that is seen by everyone using the API; this one is always
		// present.
	void*			original_data;
		// When in a transaction, this contains the original data from before
		// the transaction.
	void*			parent_data;
		// This is a lazily alloced buffer that represents the contents of the
		// block in the parent transaction. It may point to current_data if the
		// contents have been changed only in the parent transaction, or, if the
		// block has been changed in the current sub transaction already, to a
		// new block containing the contents changed in the parent transaction.
		// If this is NULL, the block has not been changed in the parent
		// transaction at all.
#if BLOCK_CACHE_DEBUG_CHANGED
	void*			compare;
#endif
	int32			ref_count;
	int32			last_accessed;
	bool			busy_reading : 1;
	bool			busy_writing : 1;
	bool			is_writing : 1;
		// Block has been checked out for writing without transactions, and
		// cannot be written back if set
	bool			is_dirty : 1;
	bool			unused : 1;
	bool			discard : 1;
	bool			busy_reading_waiters : 1;
	bool			busy_writing_waiters : 1;
	cache_transaction* transaction;
		// This is the current active transaction, if any, the block is
		// currently in (meaning was changed as a part of it).
	cache_transaction* previous_transaction;
		// This is set to the last transaction that was ended containing this
		// block. In this case, the block has not yet written back yet, and
		// the changed data is either in current_data, or original_data -- the
		// latter if the block is already being part of another transaction.
		// There can only be one previous transaction, so when the active
		// transaction ends, the changes of the previous transaction have to
		// be written back before that transaction becomes the next previous
		// transaction.
 
	bool CanBeWritten() const;
	int32 LastAccess() const
		{ return system_time() / 1000000L - last_accessed; }
};
 
typedef DoublyLinkedList<cached_block,
	DoublyLinkedListMemberGetLink<cached_block,
		&cached_block::link> > block_list;
 
struct cache_notification : DoublyLinkedListLinkImpl<cache_notification> {
	int32			transaction_id;
	int32			events_pending;
	int32			events;
	transaction_notification_hook hook;
	void*			data;
	bool			delete_after_event;
};
 
typedef DoublyLinkedList<cache_notification> NotificationList;
 
struct BlockHash {
	typedef off_t			KeyType;
	typedef	cached_block	ValueType;
 
	size_t HashKey(KeyType key) const
	{
		return key;
	}
 
	size_t Hash(ValueType* block) const
	{
		return block->block_number;
	}
 
	bool Compare(KeyType key, ValueType* block) const
	{
		return block->block_number == key;
	}
 
	ValueType*& GetLink(ValueType* value) const
	{
		return value->next;
	}
};
 
typedef BOpenHashTable<BlockHash> BlockTable;
 
 
struct TransactionHash {
	typedef int32				KeyType;
	typedef	cache_transaction	ValueType;
 
	size_t HashKey(KeyType key) const
	{
		return key;
	}
 
	size_t Hash(ValueType* transaction) const;
	bool Compare(KeyType key, ValueType* transaction) const;
	ValueType*& GetLink(ValueType* value) const;
};
 
typedef BOpenHashTable<TransactionHash> TransactionTable;
 
 
struct block_cache : DoublyLinkedListLinkImpl<block_cache> {
	BlockTable*		hash;
	mutex			lock;
	int				fd;
	off_t			max_blocks;
	size_t			block_size;
	int32			next_transaction_id;
	cache_transaction* last_transaction;
	TransactionTable* transaction_hash;
 
	object_cache*	buffer_cache;
	block_list		unused_blocks;
	uint32			unused_block_count;
 
	ConditionVariable busy_reading_condition;
	uint32			busy_reading_count;
	bool			busy_reading_waiters;
 
	ConditionVariable busy_writing_condition;
	uint32			busy_writing_count;
	bool			busy_writing_waiters;
 
	uint32			num_dirty_blocks;
	bool			read_only;
 
	NotificationList pending_notifications;
	ConditionVariable condition_variable;
 
					block_cache(int fd, off_t numBlocks, size_t blockSize,
						bool readOnly);
					~block_cache();
 
	status_t		Init();
 
	void			Free(void* buffer);
	void*			Allocate();
	void			FreeBlock(cached_block* block);
	cached_block*	NewBlock(off_t blockNumber);
	void			FreeBlockParentData(cached_block* block);
 
	void			RemoveUnusedBlocks(int32 count, int32 minSecondsOld = 0);
	void			RemoveBlock(cached_block* block);
	void			DiscardBlock(cached_block* block);
 
private:
	static void		_LowMemoryHandler(void* data, uint32 resources,
						int32 level);
	cached_block*	_GetUnusedBlock();
};
 
struct cache_listener;
typedef DoublyLinkedListLink<cache_listener> listener_link;
 
struct cache_listener : cache_notification {
	listener_link	link;
};
 
typedef DoublyLinkedList<cache_listener,
	DoublyLinkedListMemberGetLink<cache_listener,
		&cache_listener::link> > ListenerList;
 
 
struct cache_transaction {
	cache_transaction();
 
	cache_transaction* next;
	int32			id;
	int32			num_blocks;
	int32			main_num_blocks;
	int32			sub_num_blocks;
	cached_block*	first_block;
	block_list		blocks;
	ListenerList	listeners;
	bool			open;
	bool			has_sub_transaction;
	bigtime_t		last_used;
	int32			busy_writing_count;
};
 
 
class BlockWriter {
public:
								BlockWriter(block_cache* cache,
									size_t max = SIZE_MAX);
								~BlockWriter();
 
			bool				Add(cached_block* block,
									cache_transaction* transaction = NULL);
			bool				Add(cache_transaction* transaction,
									bool& hasLeftOvers);
 
			status_t			Write(cache_transaction* transaction = NULL,
									bool canUnlock = true);
 
			bool				DeletedTransaction() const
									{ return fDeletedTransaction; }
 
	static	status_t			WriteBlock(block_cache* cache,
									cached_block* block);
 
private:
			void*				_Data(cached_block* block) const;
			status_t			_WriteBlock(cached_block* block);
			void				_BlockDone(cached_block* block,
									cache_transaction* transaction);
			void				_UnmarkWriting(cached_block* block);
 
	static	int					_CompareBlocks(const void* _blockA,
									const void* _blockB);
 
private:
	static	const size_t		kBufferSize = 64;
 
			block_cache*		fCache;
			cached_block*		fBuffer[kBufferSize];
			cached_block**		fBlocks;
			size_t				fCount;
			size_t				fTotal;
			size_t				fCapacity;
			size_t				fMax;
			status_t			fStatus;
			bool				fDeletedTransaction;
};
 
 
class TransactionLocking {
public:
	inline bool Lock(block_cache* cache)
	{
		mutex_lock(&cache->lock);
 
		while (cache->busy_writing_count != 0) {
			// wait for all blocks to be written
			ConditionVariableEntry entry;
			cache->busy_writing_condition.Add(&entry);
			cache->busy_writing_waiters = true;
 
			mutex_unlock(&cache->lock);
 
			entry.Wait();
 
			mutex_lock(&cache->lock);
		}
 
		return true;
	}
 
	inline void Unlock(block_cache* cache)
	{
		mutex_unlock(&cache->lock);
	}
};
 
typedef AutoLocker<block_cache, TransactionLocking> TransactionLocker;
 
} // namespace
 
 
#if BLOCK_CACHE_BLOCK_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
namespace BlockTracing {
 
class Action : public AbstractTraceEntry {
public:
	Action(block_cache* cache, cached_block* block)
		:
		fCache(cache),
		fBlockNumber(block->block_number),
		fIsDirty(block->is_dirty),
		fHasOriginal(block->original_data != NULL),
		fHasParent(block->parent_data != NULL),
		fTransactionID(-1),
		fPreviousID(-1)
	{
		if (block->transaction != NULL)
			fTransactionID = block->transaction->id;
		if (block->previous_transaction != NULL)
			fPreviousID = block->previous_transaction->id;
	}
 
	virtual void AddDump(TraceOutput& out)
	{
		out.Print("block cache %p, %s %" B_PRIu64 ", %c%c%c transaction %" B_PRId32
			" (previous id %" B_PRId32 ")\n", fCache, _Action(), fBlockNumber,
			fIsDirty ? 'd' : '-', fHasOriginal ? 'o' : '-',
			fHasParent ? 'p' : '-', fTransactionID, fPreviousID);
	}
 
	virtual const char* _Action() const = 0;
 
private:
	block_cache*		fCache;
	uint64				fBlockNumber;
	bool				fIsDirty;
	bool				fHasOriginal;
	bool				fHasParent;
	int32				fTransactionID;
	int32				fPreviousID;
};
 
class Get : public Action {
public:
	Get(block_cache* cache, cached_block* block)
		:
		Action(cache, block)
	{
		Initialized();
	}
 
	virtual const char* _Action() const { return "get"; }
};
 
class Put : public Action {
public:
	Put(block_cache* cache, cached_block* block)
		:
		Action(cache, block)
	{
		Initialized();
	}
 
	virtual const char* _Action() const { return "put"; }
};
 
class Read : public Action {
public:
	Read(block_cache* cache, cached_block* block)
		:
		Action(cache, block)
	{
		Initialized();
	}
 
	virtual const char* _Action() const { return "read"; }
};
 
class Write : public Action {
public:
	Write(block_cache* cache, cached_block* block)
		:
		Action(cache, block)
	{
		Initialized();
	}
 
	virtual const char* _Action() const { return "write"; }
};
 
class Flush : public Action {
public:
	Flush(block_cache* cache, cached_block* block, bool getUnused = false)
		:
		Action(cache, block),
		fGetUnused(getUnused)
	{
		Initialized();
	}
 
	virtual const char* _Action() const
		{ return fGetUnused ? "get-unused" : "flush"; }
 
private:
	bool	fGetUnused;
};
 
class Error : public AbstractTraceEntry {
public:
	Error(block_cache* cache, uint64 blockNumber, const char* message,
			status_t status = B_OK)
		:
		fCache(cache),
		fBlockNumber(blockNumber),
		fMessage(message),
		fStatus(status)
	{
		Initialized();
	}
 
	virtual void AddDump(TraceOutput& out)
	{
		out.Print("block cache %p, error %" B_PRIu64 ", %s%s%s",
			fCache, fBlockNumber, fMessage, fStatus != B_OK ? ": " : "",
			fStatus != B_OK ? strerror(fStatus) : "");
	}
 
private:
	block_cache*	fCache;
	uint64			fBlockNumber;
	const char*		fMessage;
	status_t		fStatus;
};
 
#if BLOCK_CACHE_BLOCK_TRACING >= 2
class BlockData : public AbstractTraceEntry {
public:
	enum {
		kCurrent	= 0x01,
		kParent		= 0x02,
		kOriginal	= 0x04
	};
 
	BlockData(block_cache* cache, cached_block* block, const char* message)
		:
		fCache(cache),
		fSize(cache->block_size),
		fBlockNumber(block->block_number),
		fMessage(message)
	{
		_Allocate(fCurrent, block->current_data);
		_Allocate(fParent, block->parent_data);
		_Allocate(fOriginal, block->original_data);
 
#if KTRACE_PRINTF_STACK_TRACE
		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
			false);
#endif
 
		Initialized();
	}
 
	virtual void AddDump(TraceOutput& out)
	{
		out.Print("block cache %p, block %" B_PRIu64 ", data %c%c%c: %s",
			fCache, fBlockNumber, fCurrent != NULL ? 'c' : '-',
			fParent != NULL ? 'p' : '-', fOriginal != NULL ? 'o' : '-',
			fMessage);
	}
 
#if KTRACE_PRINTF_STACK_TRACE
	virtual void DumpStackTrace(TraceOutput& out)
	{
		out.PrintStackTrace(fStackTrace);
	}
#endif
 
	void DumpBlocks(uint32 which, uint32 offset, uint32 size)
	{
		if ((which & kCurrent) != 0)
			DumpBlock(kCurrent, offset, size);
		if ((which & kParent) != 0)
			DumpBlock(kParent, offset, size);
		if ((which & kOriginal) != 0)
			DumpBlock(kOriginal, offset, size);
	}
 
	void DumpBlock(uint32 which, uint32 offset, uint32 size)
	{
		if (offset > fSize) {
			kprintf("invalid offset (block size %" B_PRIu32 ")\n", fSize);
			return;
		}
		if (offset + size > fSize)
			size = fSize - offset;
 
		const char* label;
		uint8* data;
 
		if ((which & kCurrent) != 0) {
			label = "current";
			data = fCurrent;
		} else if ((which & kParent) != 0) {
			label = "parent";
			data = fParent;
		} else if ((which & kOriginal) != 0) {
			label = "original";
			data = fOriginal;
		} else
			return;
 
		kprintf("%s: offset %" B_PRIu32 ", %" B_PRIu32 " bytes\n", label, offset, size);
 
		static const uint32 kBlockSize = 16;
		data += offset;
 
		for (uint32 i = 0; i < size;) {
			int start = i;
 
			kprintf("  %04" B_PRIx32 " ", i);
			for (; i < start + kBlockSize; i++) {
				if (!(i % 4))
					kprintf(" ");
 
				if (i >= size)
					kprintf("  ");
				else
					kprintf("%02x", data[i]);
			}
 
			kprintf("\n");
		}
	}
 
private:
	void _Allocate(uint8*& target, void* source)
	{
		if (source == NULL) {
			target = NULL;
			return;
		}
 
		target = alloc_tracing_buffer_memcpy(source, fSize, false);
	}
 
	block_cache*	fCache;
	uint32			fSize;
	uint64			fBlockNumber;
	const char*		fMessage;
	uint8*			fCurrent;
	uint8*			fParent;
	uint8*			fOriginal;
#if KTRACE_PRINTF_STACK_TRACE
	tracing_stack_trace* fStackTrace;
#endif
};
#endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
 
}	// namespace BlockTracing
 
#	define TB(x) new(std::nothrow) BlockTracing::x;
#else
#	define TB(x) ;
#endif
 
#if BLOCK_CACHE_BLOCK_TRACING >= 2
#	define TB2(x) new(std::nothrow) BlockTracing::x;
#else
#	define TB2(x) ;
#endif
 
 
#if BLOCK_CACHE_TRANSACTION_TRACING && !defined(BUILDING_USERLAND_FS_SERVER)
namespace TransactionTracing {
 
class Action : public AbstractTraceEntry {
public:
	Action(const char* label, block_cache* cache,
			cache_transaction* transaction)
		:
		fCache(cache),
		fTransaction(transaction),
		fID(transaction->id),
		fSub(transaction->has_sub_transaction),
		fNumBlocks(transaction->num_blocks),
		fSubNumBlocks(transaction->sub_num_blocks)
	{
		strlcpy(fLabel, label, sizeof(fLabel));
		Initialized();
	}
 
	virtual void AddDump(TraceOutput& out)
	{
		out.Print("block cache %p, %s transaction %p (id %" B_PRId32 ")%s"
			", %" B_PRId32 "/%" B_PRId32 " blocks", fCache, fLabel, fTransaction,
			fID, fSub ? " sub" : "", fNumBlocks, fSubNumBlocks);
	}
 
private:
	char				fLabel[12];
	block_cache*		fCache;
	cache_transaction*	fTransaction;
	int32				fID;
	bool				fSub;
	int32				fNumBlocks;
	int32				fSubNumBlocks;
};
 
class Detach : public AbstractTraceEntry {
public:
	Detach(block_cache* cache, cache_transaction* transaction,
			cache_transaction* newTransaction)
		:
		fCache(cache),
		fTransaction(transaction),
		fID(transaction->id),
		fSub(transaction->has_sub_transaction),
		fNewTransaction(newTransaction),
		fNewID(newTransaction->id)
	{
		Initialized();
	}
 
	virtual void AddDump(TraceOutput& out)
	{
		out.Print("block cache %p, detach transaction %p (id %" B_PRId32 ")"
			"from transaction %p (id %" B_PRId32 ")%s",
			fCache, fNewTransaction, fNewID, fTransaction, fID,
			fSub ? " sub" : "");
	}
 
private:
	block_cache*		fCache;
	cache_transaction*	fTransaction;
	int32				fID;
	bool				fSub;
	cache_transaction*	fNewTransaction;
	int32				fNewID;
};
 
class Abort : public AbstractTraceEntry {
public:
	Abort(block_cache* cache, cache_transaction* transaction)
		:
		fCache(cache),
		fTransaction(transaction),
		fID(transaction->id),
		fNumBlocks(0)
	{
		bool isSub = transaction->has_sub_transaction;
		fNumBlocks = isSub ? transaction->sub_num_blocks
			: transaction->num_blocks;
		fBlocks = (off_t*)alloc_tracing_buffer(fNumBlocks * sizeof(off_t));
		if (fBlocks != NULL) {
			cached_block* block = transaction->first_block;
			for (int32 i = 0; block != NULL && i < fNumBlocks;
					block = block->transaction_next) {
				fBlocks[i++] = block->block_number;
			}
		} else
			fNumBlocks = 0;
 
#if KTRACE_PRINTF_STACK_TRACE
		fStackTrace = capture_tracing_stack_trace(KTRACE_PRINTF_STACK_TRACE, 1,
			false);
#endif
 
		Initialized();
	}
 
	virtual void AddDump(TraceOutput& out)
	{
		out.Print("block cache %p, abort transaction "
			"%p (id %" B_PRId32 "), blocks", fCache, fTransaction, fID);
		for (int32 i = 0; i < fNumBlocks && !out.IsFull(); i++)
			out.Print(" %" B_PRIdOFF, fBlocks[i]);
	}
 
#if KTRACE_PRINTF_STACK_TRACE
	virtual void DumpStackTrace(TraceOutput& out)
	{
		out.PrintStackTrace(fStackTrace);
	}
#endif
 
private:
	block_cache*		fCache;
	cache_transaction*	fTransaction;
	int32				fID;
	off_t*				fBlocks;
	int32				fNumBlocks;
#if KTRACE_PRINTF_STACK_TRACE
	tracing_stack_trace* fStackTrace;
#endif
};
 
}	// namespace TransactionTracing
 
#	define T(x) new(std::nothrow) TransactionTracing::x;
#else
#	define T(x) ;
#endif
 
 
static DoublyLinkedList<block_cache> sCaches;
static mutex sCachesLock = MUTEX_INITIALIZER("block caches");
static mutex sCachesMemoryUseLock
	= MUTEX_INITIALIZER("block caches memory use");
static size_t sUsedMemory;
static sem_id sEventSemaphore;
static mutex sNotificationsLock
	= MUTEX_INITIALIZER("block cache notifications");
static thread_id sNotifierWriterThread;
static DoublyLinkedListLink<block_cache> sMarkCache;
	// TODO: this only works if the link is the first entry of block_cache
static object_cache* sBlockCache;
 
 
//	#pragma mark - notifications/listener
 
 
/*!	Checks whether or not this is an event that closes a transaction. */
static inline bool
is_closing_event(int32 event)
{
	return (event & (TRANSACTION_ABORTED | TRANSACTION_ENDED)) != 0;
}
 
 
static inline bool
is_written_event(int32 event)
{
	return (event & TRANSACTION_WRITTEN) != 0;
}
 
 
/*!	From the specified \a notification, it will remove the lowest pending
	event, and return that one in \a _event.
	If there is no pending event anymore, it will return \c false.
*/
static bool
get_next_pending_event(cache_notification* notification, int32* _event)
{
	for (int32 eventMask = 1; eventMask <= TRANSACTION_IDLE; eventMask <<= 1) {
		int32 pending = atomic_and(&notification->events_pending,
			~eventMask);
 
		bool more = (pending & ~eventMask) != 0;
 
		if ((pending & eventMask) != 0) {
			*_event = eventMask;
			return more;
		}
	}
 
	return false;
}
 
 
static void
flush_pending_notifications(block_cache* cache)
{
	ASSERT_LOCKED_MUTEX(&sCachesLock);
 
	while (true) {
		MutexLocker locker(sNotificationsLock);
 
		cache_notification* notification = cache->pending_notifications.Head();
		if (notification == NULL)
			return;
 
		bool deleteAfterEvent = false;
		int32 event = -1;
		if (!get_next_pending_event(notification, &event)) {
			// remove the notification if this was the last pending event
			cache->pending_notifications.Remove(notification);
			deleteAfterEvent = notification->delete_after_event;
		}
 
		if (event >= 0) {
			// Notify listener, we need to copy the notification, as it might
			// be removed when we unlock the list.
			cache_notification copy = *notification;
			locker.Unlock();
 
			copy.hook(copy.transaction_id, event, copy.data);
 
			locker.Lock();
		}
 
		if (deleteAfterEvent)
			delete notification;
	}
}
 
 
/*!	Flushes all pending notifications by calling the appropriate hook
	functions.
	Must not be called with a cache lock held.
*/
static void
flush_pending_notifications()
{
	MutexLocker _(sCachesLock);
 
	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
	while (iterator.HasNext()) {
		block_cache* cache = iterator.Next();
 
		flush_pending_notifications(cache);
	}
}
 
 
/*!	Initializes the \a notification as specified. */
static void
set_notification(cache_transaction* transaction,
	cache_notification &notification, int32 events,
	transaction_notification_hook hook, void* data)
{
	notification.transaction_id = transaction != NULL ? transaction->id : -1;
	notification.events_pending = 0;
	notification.events = events;
	notification.hook = hook;
	notification.data = data;
	notification.delete_after_event = false;
}
 
 
/*!	Makes sure the notification is deleted. It either deletes it directly,
	when possible, or marks it for deletion if the notification is pending.
*/
static void
delete_notification(cache_notification* notification)
{
	MutexLocker locker(sNotificationsLock);
 
	if (notification->events_pending != 0)
		notification->delete_after_event = true;
	else
		delete notification;
}
 
 
/*!	Adds the notification to the pending notifications list, or, if it's
	already part of it, updates its events_pending field.
	Also marks the notification to be deleted if \a deleteNotification
	is \c true.
	Triggers the notifier thread to run.
*/
static void
add_notification(block_cache* cache, cache_notification* notification,
	int32 event, bool deleteNotification)
{
	if (notification->hook == NULL)
		return;
 
	int32 pending = atomic_or(&notification->events_pending, event);
	if (pending == 0) {
		// not yet part of the notification list
		MutexLocker locker(sNotificationsLock);
		if (deleteNotification)
			notification->delete_after_event = true;
		cache->pending_notifications.Add(notification);
	} else if (deleteNotification) {
		// we might need to delete it ourselves if we're late
		delete_notification(notification);
	}
 
	release_sem_etc(sEventSemaphore, 1, B_DO_NOT_RESCHEDULE);
		// We're probably still holding some locks that makes rescheduling
		// not a good idea at this point.
}
 
 
/*!	Notifies all interested listeners of this transaction about the \a event.
	If \a event is a closing event (ie. TRANSACTION_ENDED, and
	TRANSACTION_ABORTED), all listeners except those listening to
	TRANSACTION_WRITTEN will be removed.
*/
static void
notify_transaction_listeners(block_cache* cache, cache_transaction* transaction,
	int32 event)
{
	T(Action("notify", cache, transaction));
 
	bool isClosing = is_closing_event(event);
	bool isWritten = is_written_event(event);
 
	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
	while (iterator.HasNext()) {
		cache_listener* listener = iterator.Next();
 
		bool remove = (isClosing && !is_written_event(listener->events))
			|| (isWritten && is_written_event(listener->events));
		if (remove)
			iterator.Remove();
 
		if ((listener->events & event) != 0)
			add_notification(cache, listener, event, remove);
		else if (remove)
			delete_notification(listener);
	}
}
 
 
/*!	Removes and deletes all listeners that are still monitoring this
	transaction.
*/
static void
remove_transaction_listeners(block_cache* cache, cache_transaction* transaction)
{
	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
	while (iterator.HasNext()) {
		cache_listener* listener = iterator.Next();
		iterator.Remove();
 
		delete_notification(listener);
	}
}
 
 
static status_t
add_transaction_listener(block_cache* cache, cache_transaction* transaction,
	int32 events, transaction_notification_hook hookFunction, void* data)
{
	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
	while (iterator.HasNext()) {
		cache_listener* listener = iterator.Next();
 
		if (listener->data == data && listener->hook == hookFunction) {
			// this listener already exists, just update it
			listener->events |= events;
			return B_OK;
		}
	}
 
	cache_listener* listener = new(std::nothrow) cache_listener;
	if (listener == NULL)
		return B_NO_MEMORY;
 
	set_notification(transaction, *listener, events, hookFunction, data);
	transaction->listeners.Add(listener);
	return B_OK;
}
 
 
//	#pragma mark - private transaction
 
 
cache_transaction::cache_transaction()
{
	num_blocks = 0;
	main_num_blocks = 0;
	sub_num_blocks = 0;
	first_block = NULL;
	open = true;
	has_sub_transaction = false;
	last_used = system_time();
	busy_writing_count = 0;
}
 
 
static void
delete_transaction(block_cache* cache, cache_transaction* transaction)
{
	if (cache->last_transaction == transaction)
		cache->last_transaction = NULL;
 
	remove_transaction_listeners(cache, transaction);
	delete transaction;
}
 
 
static cache_transaction*
lookup_transaction(block_cache* cache, int32 id)
{
	return cache->transaction_hash->Lookup(id);
}
 
 
size_t TransactionHash::Hash(cache_transaction* transaction) const
{
	return transaction->id;
}
 
 
bool TransactionHash::Compare(int32 key, cache_transaction* transaction) const
{
	return transaction->id == key;
}
 
 
cache_transaction*& TransactionHash::GetLink(cache_transaction* value) const
{
	return value->next;
}
 
 
/*!	Writes back any changes made to blocks in \a transaction that are still
	part of a previous transacton.
*/
static status_t
write_blocks_in_previous_transaction(block_cache* cache,
	cache_transaction* transaction)
{
	BlockWriter writer(cache);
 
	cached_block* block = transaction->first_block;
	for (; block != NULL; block = block->transaction_next) {
		if (block->previous_transaction != NULL) {
			// need to write back pending changes
			writer.Add(block);
		}
	}
 
	return writer.Write();
}
 
 
//	#pragma mark - cached_block
 
 
bool
cached_block::CanBeWritten() const
{
	return !busy_writing && !busy_reading
		&& (previous_transaction != NULL
			|| (transaction == NULL && is_dirty && !is_writing));
}
 
 
//	#pragma mark - BlockWriter
 
 
BlockWriter::BlockWriter(block_cache* cache, size_t max)
	:
	fCache(cache),
	fBlocks(fBuffer),
	fCount(0),
	fTotal(0),
	fCapacity(kBufferSize),
	fMax(max),
	fStatus(B_OK),
	fDeletedTransaction(false)
{
}
 
 
BlockWriter::~BlockWriter()
{
	if (fBlocks != fBuffer)
		free(fBlocks);
}
 
 
/*!	Adds the specified block to the to be written array. If no more blocks can
	be added, false is returned, otherwise true.
*/
bool
BlockWriter::Add(cached_block* block, cache_transaction* transaction)
{
	ASSERT(block->CanBeWritten());
 
	if (fTotal == fMax)
		return false;
 
	if (fCount >= fCapacity) {
		// Enlarge array if necessary
		cached_block** newBlocks;
		size_t newCapacity = max_c(256, fCapacity * 2);
		if (fBlocks == fBuffer)
			newBlocks = (cached_block**)malloc(newCapacity * sizeof(void*));
		else {
			newBlocks = (cached_block**)realloc(fBlocks,
				newCapacity * sizeof(void*));
		}
 
		if (newBlocks == NULL) {
			// Allocating a larger array failed - we need to write back what
			// we have synchronously now (this will also clear the array)
			Write(transaction, false);
		} else {
			if (fBlocks == fBuffer)
				memcpy(newBlocks, fBuffer, kBufferSize * sizeof(void*));
 
			fBlocks = newBlocks;
			fCapacity = newCapacity;
		}
	}
 
	fBlocks[fCount++] = block;
	fTotal++;
	block->busy_writing = true;
	fCache->busy_writing_count++;
	if (block->previous_transaction != NULL)
		block->previous_transaction->busy_writing_count++;
 
	return true;
}
 
 
/*!	Adds all blocks of the specified transaction to the to be written array.
	If no more blocks can be added, false is returned, otherwise true.
*/
bool
BlockWriter::Add(cache_transaction* transaction, bool& hasLeftOvers)
{
	ASSERT(!transaction->open);
 
	if (transaction->busy_writing_count != 0) {
		hasLeftOvers = true;
		return true;
	}
 
	hasLeftOvers = false;
 
	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
	while (cached_block* block = blockIterator.Next()) {
		if (!block->CanBeWritten()) {
			// This block was already part of a previous transaction within this
			// writer
			hasLeftOvers = true;
			continue;
		}
		if (!Add(block, transaction))
			return false;
 
		if (DeletedTransaction())
			break;
	}
 
	return true;
}
 
 
/*! Cache must be locked when calling this method, but it will be unlocked
	while the blocks are written back.
*/
status_t
BlockWriter::Write(cache_transaction* transaction, bool canUnlock)
{
	if (fCount == 0)
		return B_OK;
 
	if (canUnlock)
		mutex_unlock(&fCache->lock);
 
	// Sort blocks in their on-disk order
	// TODO: ideally, this should be handled by the I/O scheduler
 
	qsort(fBlocks, fCount, sizeof(void*), &_CompareBlocks);
	fDeletedTransaction = false;
 
	for (uint32 i = 0; i < fCount; i++) {
		status_t status = _WriteBlock(fBlocks[i]);
		if (status != B_OK) {
			// propagate to global error handling
			if (fStatus == B_OK)
				fStatus = status;
 
			_UnmarkWriting(fBlocks[i]);
			fBlocks[i] = NULL;
				// This block will not be marked clean
		}
	}
 
	if (canUnlock)
		mutex_lock(&fCache->lock);
 
	for (uint32 i = 0; i < fCount; i++)
		_BlockDone(fBlocks[i], transaction);
 
	fCount = 0;
	return fStatus;
}
 
 
/*!	Writes the specified \a block back to disk. It will always only write back
	the oldest change of the block if it is part of more than one transaction.
	It will automatically send out TRANSACTION_WRITTEN notices, as well as
	delete transactions when they are no longer used, and \a deleteTransaction
	is \c true.
*/
/*static*/ status_t
BlockWriter::WriteBlock(block_cache* cache, cached_block* block)
{
	BlockWriter writer(cache);
 
	writer.Add(block);
	return writer.Write();
}
 
 
void*
BlockWriter::_Data(cached_block* block) const
{
	return block->previous_transaction != NULL && block->original_data != NULL
		? block->original_data : block->current_data;
		// We first need to write back changes from previous transactions
}
 
 
status_t
BlockWriter::_WriteBlock(cached_block* block)
{
	ASSERT(block->busy_writing);
 
	TRACE(("BlockWriter::_WriteBlock(block %" B_PRIdOFF ")\n", block->block_number));
	TB(Write(fCache, block));
	TB2(BlockData(fCache, block, "before write"));
 
	size_t blockSize = fCache->block_size;
 
	ssize_t written = write_pos(fCache->fd,
		block->block_number * blockSize, _Data(block), blockSize);
 
	if (written != (ssize_t)blockSize) {
		TB(Error(fCache, block->block_number, "write failed", written));
		TRACE_ALWAYS(("could not write back block %" B_PRIdOFF " (%s)\n", block->block_number,
			strerror(errno)));
		if (written < 0)
			return errno;
 
		return B_IO_ERROR;
	}
 
	return B_OK;
}
 
 
void
BlockWriter::_BlockDone(cached_block* block,
	cache_transaction* transaction)
{
	if (block == NULL) {
		// An error occured when trying to write this block
		return;
	}
 
	if (fCache->num_dirty_blocks > 0)
		fCache->num_dirty_blocks--;
 
	if (_Data(block) == block->current_data)
		block->is_dirty = false;
 
	_UnmarkWriting(block);
 
	cache_transaction* previous = block->previous_transaction;
	if (previous != NULL) {
		previous->blocks.Remove(block);
		block->previous_transaction = NULL;
 
		if (block->original_data != NULL && block->transaction == NULL) {
			// This block is not part of a transaction, so it does not need
			// its original pointer anymore.
			fCache->Free(block->original_data);
			block->original_data = NULL;
		}
 
		// Has the previous transaction been finished with that write?
		if (--previous->num_blocks == 0) {
			TRACE(("cache transaction %" B_PRId32 " finished!\n", previous->id));
			T(Action("written", fCache, previous));
 
			notify_transaction_listeners(fCache, previous,
				TRANSACTION_WRITTEN);
 
			if (transaction != NULL) {
				// This function is called while iterating transaction_hash. We
				// use RemoveUnchecked so the iterator is still valid. A regular
				// Remove can trigger a resize of the hash table which would
				// result in the linked items in the table changing order.
				fCache->transaction_hash->RemoveUnchecked(transaction);
			} else
				fCache->transaction_hash->Remove(previous);
 
			delete_transaction(fCache, previous);
			fDeletedTransaction = true;
		}
	}
	if (block->transaction == NULL && block->ref_count == 0 && !block->unused) {
		// the block is no longer used
		ASSERT(block->original_data == NULL && block->parent_data == NULL);
		block->unused = true;
		fCache->unused_blocks.Add(block);
		fCache->unused_block_count++;
	}
 
	TB2(BlockData(fCache, block, "after write"));
}
 
 
void
BlockWriter::_UnmarkWriting(cached_block* block)
{
	block->busy_writing = false;
	if (block->previous_transaction != NULL)
		block->previous_transaction->busy_writing_count--;
	fCache->busy_writing_count--;
 
	if ((fCache->busy_writing_waiters && fCache->busy_writing_count == 0)
		|| block->busy_writing_waiters) {
		fCache->busy_writing_waiters = false;
		block->busy_writing_waiters = false;
		fCache->busy_writing_condition.NotifyAll();
	}
}
 
 
/*static*/ int
BlockWriter::_CompareBlocks(const void* _blockA, const void* _blockB)
{
	cached_block* blockA = *(cached_block**)_blockA;
	cached_block* blockB = *(cached_block**)_blockB;
 
	off_t diff = blockA->block_number - blockB->block_number;
	if (diff > 0)
		return 1;
 
	return diff < 0 ? -1 : 0;
}
 
 
//	#pragma mark - block_cache
 
 
block_cache::block_cache(int _fd, off_t numBlocks, size_t blockSize,
		bool readOnly)
	:
	hash(NULL),
	fd(_fd),
	max_blocks(numBlocks),
	block_size(blockSize),
	next_transaction_id(1),
	last_transaction(NULL),
	transaction_hash(NULL),
	buffer_cache(NULL),
	unused_block_count(0),
	busy_reading_count(0),
	busy_reading_waiters(false),
	busy_writing_count(0),
	busy_writing_waiters(0),
	num_dirty_blocks(0),
	read_only(readOnly)
{
}
 
 
/*! Should be called with the cache's lock held. */
block_cache::~block_cache()
{
	unregister_low_resource_handler(&_LowMemoryHandler, this);
 
	delete transaction_hash;
	delete hash;
 
	delete_object_cache(buffer_cache);
 
	mutex_destroy(&lock);
}
 
 
status_t
block_cache::Init()
{
	busy_reading_condition.Init(this, "cache block busy_reading");
	busy_writing_condition.Init(this, "cache block busy writing");
	condition_variable.Init(this, "cache transaction sync");
	mutex_init(&lock, "block cache");
 
	buffer_cache = create_object_cache_etc("block cache buffers", block_size,
		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
	if (buffer_cache == NULL)
		return B_NO_MEMORY;
 
	hash = new BlockTable();
	if (hash == NULL || hash->Init(1024) != B_OK)
		return B_NO_MEMORY;
 
	transaction_hash = new(std::nothrow) TransactionTable();
	if (transaction_hash == NULL || transaction_hash->Init(16) != B_OK)
		return B_NO_MEMORY;
 
	return register_low_resource_handler(&_LowMemoryHandler, this,
		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
			| B_KERNEL_RESOURCE_ADDRESS_SPACE, 0);
}
 
 
void
block_cache::Free(void* buffer)
{
	if (buffer != NULL)
		object_cache_free(buffer_cache, buffer, 0);
}
 
 
void*
block_cache::Allocate()
{
	void* block = object_cache_alloc(buffer_cache, 0);
	if (block != NULL)
		return block;
 
	// recycle existing before allocating a new one
	RemoveUnusedBlocks(100);
 
	return object_cache_alloc(buffer_cache, 0);
}
 
 
void
block_cache::FreeBlock(cached_block* block)
{
	Free(block->current_data);
 
	if (block->original_data != NULL || block->parent_data != NULL) {
		panic("block_cache::FreeBlock(): %" B_PRIdOFF ", original %p, parent %p\n",
			block->block_number, block->original_data, block->parent_data);
	}
 
#if BLOCK_CACHE_DEBUG_CHANGED
	Free(block->compare);
#endif
 
	object_cache_free(sBlockCache, block, 0);
}
 
 
/*! Allocates a new block for \a blockNumber, ready for use */
cached_block*
block_cache::NewBlock(off_t blockNumber)
{
	cached_block* block = NULL;
 
	if (low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
			| B_KERNEL_RESOURCE_ADDRESS_SPACE) != B_NO_LOW_RESOURCE) {
		// recycle existing instead of allocating a new one
		block = _GetUnusedBlock();
	}
	if (block == NULL) {
		block = (cached_block*)object_cache_alloc(sBlockCache, 0);
		if (block != NULL) {
			block->current_data = Allocate();
			if (block->current_data == NULL) {
				object_cache_free(sBlockCache, block, 0);
				return NULL;
			}
		} else {
			TB(Error(this, blockNumber, "allocation failed"));
			dprintf("block allocation failed, unused list is %sempty.\n",
				unused_blocks.IsEmpty() ? "" : "not ");
 
			// allocation failed, try to reuse an unused block
			block = _GetUnusedBlock();
			if (block == NULL) {
				TB(Error(this, blockNumber, "get unused failed"));
				FATAL(("could not allocate block!\n"));
				return NULL;
			}
		}
	}
 
	block->block_number = blockNumber;
	block->ref_count = 0;
	block->last_accessed = 0;
	block->transaction_next = NULL;
	block->transaction = block->previous_transaction = NULL;
	block->original_data = NULL;
	block->parent_data = NULL;
	block->busy_reading = false;
	block->busy_writing = false;
	block->is_writing = false;
	block->is_dirty = false;
	block->unused = false;
	block->discard = false;
	block->busy_reading_waiters = false;
	block->busy_writing_waiters = false;
#if BLOCK_CACHE_DEBUG_CHANGED
	block->compare = NULL;
#endif
 
	return block;
}
 
 
void
block_cache::FreeBlockParentData(cached_block* block)
{
	ASSERT(block->parent_data != NULL);
	if (block->parent_data != block->current_data)
		Free(block->parent_data);
	block->parent_data = NULL;
}
 
 
void
block_cache::RemoveUnusedBlocks(int32 count, int32 minSecondsOld)
{
	TRACE(("block_cache: remove up to %" B_PRId32 " unused blocks\n", count));
 
	for (block_list::Iterator iterator = unused_blocks.GetIterator();
			cached_block* block = iterator.Next();) {
		if (minSecondsOld >= block->LastAccess()) {
			// The list is sorted by last access
			break;
		}
		if (block->busy_reading || block->busy_writing)
			continue;
 
		TB(Flush(this, block));
		TRACE(("  remove block %" B_PRIdOFF ", last accessed %" B_PRId32 "\n",
			block->block_number, block->last_accessed));
 
		// this can only happen if no transactions are used
		if (block->is_dirty && !block->discard) {
			if (block->busy_writing)
				continue;
 
			BlockWriter::WriteBlock(this, block);
		}
 
		// remove block from lists
		iterator.Remove();
		unused_block_count--;
		RemoveBlock(block);
 
		if (--count <= 0)
			break;
	}
}
 
 
void
block_cache::RemoveBlock(cached_block* block)
{
	hash->Remove(block);
	FreeBlock(block);
}
 
 
/*!	Discards the block from a transaction (this method must not be called
	for blocks not part of a transaction).
*/
void
block_cache::DiscardBlock(cached_block* block)
{
	ASSERT(block->discard);
	ASSERT(block->previous_transaction == NULL);
 
	if (block->parent_data != NULL)
		FreeBlockParentData(block);
 
	if (block->original_data != NULL) {
		Free(block->original_data);
		block->original_data = NULL;
	}
 
	RemoveBlock(block);
}
 
 
void
block_cache::_LowMemoryHandler(void* data, uint32 resources, int32 level)
{
	TRACE(("block_cache: low memory handler called with level %" B_PRId32 "\n", level));
 
	// free some blocks according to the low memory state
	// (if there is enough memory left, we don't free any)
 
	block_cache* cache = (block_cache*)data;
	int32 free = 0;
	int32 secondsOld = 0;
	switch (level) {
		case B_NO_LOW_RESOURCE:
			return;
		case B_LOW_RESOURCE_NOTE:
			free = cache->unused_block_count / 8;
			secondsOld = 120;
			break;
		case B_LOW_RESOURCE_WARNING:
			free = cache->unused_block_count / 4;
			secondsOld = 10;
			break;
		case B_LOW_RESOURCE_CRITICAL:
			free = cache->unused_block_count / 2;
			secondsOld = 0;
			break;
	}
 
	MutexLocker locker(&cache->lock);
 
	if (!locker.IsLocked()) {
		// If our block_cache were deleted, it could be that we had
		// been called before that deletion went through, therefore,
		// acquiring its lock might fail.
		return;
	}
 
#ifdef TRACE_BLOCK_CACHE
	uint32 oldUnused = cache->unused_block_count;
#endif
 
	cache->RemoveUnusedBlocks(free, secondsOld);
 
	TRACE(("block_cache::_LowMemoryHandler(): %p: unused: %" B_PRIu32 " -> %" B_PRIu32 "\n",
		cache, oldUnused, cache->unused_block_count));
}
 
 
cached_block*
block_cache::_GetUnusedBlock()
{
	TRACE(("block_cache: get unused block\n"));
 
	for (block_list::Iterator iterator = unused_blocks.GetIterator();
			cached_block* block = iterator.Next();) {
		TB(Flush(this, block, true));
		// this can only happen if no transactions are used
		if (block->is_dirty && !block->busy_writing && !block->discard)
			BlockWriter::WriteBlock(this, block);
 
		// remove block from lists
		iterator.Remove();
		unused_block_count--;
		hash->Remove(block);
 
		ASSERT(block->original_data == NULL && block->parent_data == NULL);
		block->unused = false;
 
		// TODO: see if compare data is handled correctly here!
#if BLOCK_CACHE_DEBUG_CHANGED
		if (block->compare != NULL)
			Free(block->compare);
#endif
		return block;
	}
 
	return NULL;
}
 
 
//	#pragma mark - private block functions
 
 
/*!	Cache must be locked.
*/
static void
mark_block_busy_reading(block_cache* cache, cached_block* block)
{
	block->busy_reading = true;
	cache->busy_reading_count++;
}
 
 
/*!	Cache must be locked.
*/
static void
mark_block_unbusy_reading(block_cache* cache, cached_block* block)
{
	block->busy_reading = false;
	cache->busy_reading_count--;
 
	if ((cache->busy_reading_waiters && cache->busy_reading_count == 0)
		|| block->busy_reading_waiters) {
		cache->busy_reading_waiters = false;
		block->busy_reading_waiters = false;
		cache->busy_reading_condition.NotifyAll();
	}
}
 
 
/*!	Cache must be locked.
*/
static void
wait_for_busy_reading_block(block_cache* cache, cached_block* block)
{
	while (block->busy_reading) {
		// wait for at least the specified block to be read in
		ConditionVariableEntry entry;
		cache->busy_reading_condition.Add(&entry);
		block->busy_reading_waiters = true;
 
		mutex_unlock(&cache->lock);
 
		entry.Wait();
 
		mutex_lock(&cache->lock);
	}
}
 
 
/*!	Cache must be locked.
*/
static void
wait_for_busy_reading_blocks(block_cache* cache)
{
	while (cache->busy_reading_count != 0) {
		// wait for all blocks to be read in
		ConditionVariableEntry entry;
		cache->busy_reading_condition.Add(&entry);
		cache->busy_reading_waiters = true;
 
		mutex_unlock(&cache->lock);
 
		entry.Wait();
 
		mutex_lock(&cache->lock);
	}
}
 
 
/*!	Cache must be locked.
*/
static void
wait_for_busy_writing_block(block_cache* cache, cached_block* block)
{
	while (block->busy_writing) {
		// wait for all blocks to be written back
		ConditionVariableEntry entry;
		cache->busy_writing_condition.Add(&entry);
		block->busy_writing_waiters = true;
 
		mutex_unlock(&cache->lock);
 
		entry.Wait();
 
		mutex_lock(&cache->lock);
	}
}
 
 
/*!	Cache must be locked.
*/
static void
wait_for_busy_writing_blocks(block_cache* cache)
{
	while (cache->busy_writing_count != 0) {
		// wait for all blocks to be written back
		ConditionVariableEntry entry;
		cache->busy_writing_condition.Add(&entry);
		cache->busy_writing_waiters = true;
 
		mutex_unlock(&cache->lock);
 
		entry.Wait();
 
		mutex_lock(&cache->lock);
	}
}
 
 
/*!	Removes a reference from the specified \a block. If this was the last
	reference, the block is moved into the unused list.
	In low memory situations, it will also free some blocks from that list,
	but not necessarily the \a block it just released.
*/
static void
put_cached_block(block_cache* cache, cached_block* block)
{
#if BLOCK_CACHE_DEBUG_CHANGED
	if (!block->is_dirty && block->compare != NULL
		&& memcmp(block->current_data, block->compare, cache->block_size)) {
		dprintf("new block:\n");
		dump_block((const char*)block->current_data, 256, "  ");
		dprintf("unchanged block:\n");
		dump_block((const char*)block->compare, 256, "  ");
		BlockWriter::WriteBlock(cache, block);
		panic("block_cache: supposed to be clean block was changed!\n");
 
		cache->Free(block->compare);
		block->compare = NULL;
	}
#endif
	TB(Put(cache, block));
 
	if (block->ref_count < 1) {
		panic("Invalid ref_count for block %p, cache %p\n", block, cache);
		return;
	}
 
	if (--block->ref_count == 0
		&& block->transaction == NULL && block->previous_transaction == NULL) {
		// This block is not used anymore, and not part of any transaction
		block->is_writing = false;
 
		if (block->discard) {
			cache->RemoveBlock(block);
		} else {
			// put this block in the list of unused blocks
			ASSERT(!block->unused);
			block->unused = true;
 
			ASSERT(block->original_data == NULL && block->parent_data == NULL);
			cache->unused_blocks.Add(block);
			cache->unused_block_count++;
		}
	}
}
 
 
static void
put_cached_block(block_cache* cache, off_t blockNumber)
{
	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
		panic("put_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
			blockNumber, cache->max_blocks - 1);
	}
 
	cached_block* block = cache->hash->Lookup(blockNumber);
	if (block != NULL)
		put_cached_block(cache, block);
	else {
		TB(Error(cache, blockNumber, "put unknown"));
	}
}
 
 
/*!	Retrieves the block \a blockNumber from the hash table, if it's already
	there, or reads it from the disk.
	You need to have the cache locked when calling this function.
 
	\param _allocated tells you whether or not a new block has been allocated
		to satisfy your request.
	\param readBlock if \c false, the block will not be read in case it was
		not already in the cache. The block you retrieve may contain random
		data. If \c true, the cache will be temporarily unlocked while the
		block is read in.
*/
static cached_block*
get_cached_block(block_cache* cache, off_t blockNumber, bool* _allocated,
	bool readBlock = true)
{
	ASSERT_LOCKED_MUTEX(&cache->lock);
 
	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
		panic("get_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
			blockNumber, cache->max_blocks - 1);
		return NULL;
	}
 
retry:
	cached_block* block = cache->hash->Lookup(blockNumber);
	*_allocated = false;
 
	if (block == NULL) {
		// put block into cache
		block = cache->NewBlock(blockNumber);
		if (block == NULL)
			return NULL;
 
		cache->hash->Insert(block);
		*_allocated = true;
	} else if (block->busy_reading) {
		// The block is currently busy_reading - wait and try again later
		wait_for_busy_reading_block(cache, block);
		goto retry;
	}
 
	if (block->unused) {
		//TRACE(("remove block %" B_PRIdOFF " from unused\n", blockNumber));
		block->unused = false;
		cache->unused_blocks.Remove(block);
		cache->unused_block_count--;
	}
 
	if (*_allocated && readBlock) {
		// read block into cache
		int32 blockSize = cache->block_size;
 
		mark_block_busy_reading(cache, block);
		mutex_unlock(&cache->lock);
 
		ssize_t bytesRead = read_pos(cache->fd, blockNumber * blockSize,
			block->current_data, blockSize);
 
		mutex_lock(&cache->lock);
		if (bytesRead < blockSize) {
			cache->RemoveBlock(block);
			TB(Error(cache, blockNumber, "read failed", bytesRead));
 
			TRACE_ALWAYS(("could not read block %" B_PRIdOFF ": bytesRead: %zd, error: %s\n",
				blockNumber, bytesRead, strerror(errno)));
			return NULL;
		}
		TB(Read(cache, block));
 
		mark_block_unbusy_reading(cache, block);
	}
 
	block->ref_count++;
	block->last_accessed = system_time() / 1000000L;
 
	return block;
}
 
 
/*!	Returns the writable block data for the requested blockNumber.
	If \a cleared is true, the block is not read from disk; an empty block
	is returned.
 
	This is the only method to insert a block into a transaction. It makes
	sure that the previous block contents are preserved in that case.
*/
static void*
get_writable_cached_block(block_cache* cache, off_t blockNumber, off_t base,
	off_t length, int32 transactionID, bool cleared)
{
	TRACE(("get_writable_cached_block(blockNumber = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
		blockNumber, transactionID));
 
	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
		panic("get_writable_cached_block: invalid block number %" B_PRIdOFF " (max %" B_PRIdOFF ")",
			blockNumber, cache->max_blocks - 1);
	}
 
	bool allocated;
	cached_block* block = get_cached_block(cache, blockNumber, &allocated,
		!cleared);
	if (block == NULL)
		return NULL;
 
	if (block->busy_writing)
		wait_for_busy_writing_block(cache, block);
 
	block->discard = false;
 
	// if there is no transaction support, we just return the current block
	if (transactionID == -1) {
		if (cleared) {
			mark_block_busy_reading(cache, block);
			mutex_unlock(&cache->lock);
 
			memset(block->current_data, 0, cache->block_size);
 
			mutex_lock(&cache->lock);
			mark_block_unbusy_reading(cache, block);
		}
 
		block->is_writing = true;
 
		if (!block->is_dirty) {
			cache->num_dirty_blocks++;
			block->is_dirty = true;
				// mark the block as dirty
		}
 
		TB(Get(cache, block));
		return block->current_data;
	}
 
	cache_transaction* transaction = block->transaction;
 
	if (transaction != NULL && transaction->id != transactionID) {
		// TODO: we have to wait here until the other transaction is done.
		//	Maybe we should even panic, since we can't prevent any deadlocks.
		panic("get_writable_cached_block(): asked to get busy writable block "
			"(transaction %" B_PRId32 ")\n", block->transaction->id);
		put_cached_block(cache, block);
		return NULL;
	}
	if (transaction == NULL && transactionID != -1) {
		// get new transaction
		transaction = lookup_transaction(cache, transactionID);
		if (transaction == NULL) {
			panic("get_writable_cached_block(): invalid transaction %" B_PRId32 "!\n",
				transactionID);
			put_cached_block(cache, block);
			return NULL;
		}
		if (!transaction->open) {
			panic("get_writable_cached_block(): transaction already done!\n");
			put_cached_block(cache, block);
			return NULL;
		}
 
		block->transaction = transaction;
 
		// attach the block to the transaction block list
		block->transaction_next = transaction->first_block;
		transaction->first_block = block;
		transaction->num_blocks++;
	}
	if (transaction != NULL)
		transaction->last_used = system_time();
 
	bool wasUnchanged = block->original_data == NULL
		|| block->previous_transaction != NULL;
 
	if (!(allocated && cleared) && block->original_data == NULL) {
		// we already have data, so we need to preserve it
		block->original_data = cache->Allocate();
		if (block->original_data == NULL) {
			TB(Error(cache, blockNumber, "allocate original failed"));
			FATAL(("could not allocate original_data\n"));
			put_cached_block(cache, block);
			return NULL;
		}
 
		mark_block_busy_reading(cache, block);
		mutex_unlock(&cache->lock);
 
		memcpy(block->original_data, block->current_data, cache->block_size);
 
		mutex_lock(&cache->lock);
		mark_block_unbusy_reading(cache, block);
	}
	if (block->parent_data == block->current_data) {
		// remember any previous contents for the parent transaction
		block->parent_data = cache->Allocate();
		if (block->parent_data == NULL) {
			// TODO: maybe we should just continue the current transaction in
			// this case...
			TB(Error(cache, blockNumber, "allocate parent failed"));
			FATAL(("could not allocate parent\n"));
			put_cached_block(cache, block);
			return NULL;
		}
 
		mark_block_busy_reading(cache, block);
		mutex_unlock(&cache->lock);
 
		memcpy(block->parent_data, block->current_data, cache->block_size);
 
		mutex_lock(&cache->lock);
		mark_block_unbusy_reading(cache, block);
 
		transaction->sub_num_blocks++;
	} else if (transaction != NULL && transaction->has_sub_transaction
		&& block->parent_data == NULL && wasUnchanged)
		transaction->sub_num_blocks++;
 
	if (cleared) {
		mark_block_busy_reading(cache, block);
		mutex_unlock(&cache->lock);
 
		memset(block->current_data, 0, cache->block_size);
 
		mutex_lock(&cache->lock);
		mark_block_unbusy_reading(cache, block);
	}
 
	block->is_dirty = true;
	TB(Get(cache, block));
	TB2(BlockData(cache, block, "get writable"));
 
	return block->current_data;
}
 
 
#if DEBUG_BLOCK_CACHE
 
 
static void
dump_block(cached_block* block)
{
	kprintf("%08lx %9" B_PRIdOFF " %08lx %08lx %08lx %5" B_PRId32 " %6" B_PRId32
		" %c%c%c%c%c%c %08lx %08lx\n",
		(addr_t)block, block->block_number,
		(addr_t)block->current_data, (addr_t)block->original_data,
		(addr_t)block->parent_data, block->ref_count, block->LastAccess(),
		block->busy_reading ? 'r' : '-', block->busy_writing ? 'w' : '-',
		block->is_writing ? 'W' : '-', block->is_dirty ? 'D' : '-',
		block->unused ? 'U' : '-', block->discard ? 'D' : '-',
		(addr_t)block->transaction,
		(addr_t)block->previous_transaction);
}
 
 
static void
dump_block_long(cached_block* block)
{
	kprintf("BLOCK %p\n", block);
	kprintf(" current data:  %p\n", block->current_data);
	kprintf(" original data: %p\n", block->original_data);
	kprintf(" parent data:   %p\n", block->parent_data);
#if BLOCK_CACHE_DEBUG_CHANGED
	kprintf(" compare data:  %p\n", block->compare);
#endif
	kprintf(" ref_count:     %" B_PRId32 "\n", block->ref_count);
	kprintf(" accessed:      %" B_PRId32 "\n", block->LastAccess());
	kprintf(" flags:        ");
	if (block->busy_reading)
		kprintf(" busy_reading");
	if (block->busy_writing)
		kprintf(" busy_writing");
	if (block->is_writing)
		kprintf(" is-writing");
	if (block->is_dirty)
		kprintf(" is-dirty");
	if (block->unused)
		kprintf(" unused");
	if (block->discard)
		kprintf(" discard");
	kprintf("\n");
	if (block->transaction != NULL) {
		kprintf(" transaction:   %p (%" B_PRId32 ")\n", block->transaction,
			block->transaction->id);
		if (block->transaction_next != NULL) {
			kprintf(" next in transaction: %" B_PRIdOFF "\n",
				block->transaction_next->block_number);
		}
	}
	if (block->previous_transaction != NULL) {
		kprintf(" previous transaction: %p (%" B_PRId32 ")\n",
			block->previous_transaction,
			block->previous_transaction->id);
	}
 
	set_debug_variable("_current", (addr_t)block->current_data);
	set_debug_variable("_original", (addr_t)block->original_data);
	set_debug_variable("_parent", (addr_t)block->parent_data);
}
 
 
static int
dump_cached_block(int argc, char** argv)
{
	if (argc != 2) {
		kprintf("usage: %s <block-address>\n", argv[0]);
		return 0;
	}
 
	dump_block_long((struct cached_block*)(addr_t)parse_expression(argv[1]));
	return 0;
}
 
 
static int
dump_cache(int argc, char** argv)
{
	bool showTransactions = false;
	bool showBlocks = false;
	int32 i = 1;
	while (argv[i] != NULL && argv[i][0] == '-') {
		for (char* arg = &argv[i][1]; arg[0]; arg++) {
			switch (arg[0]) {
				case 'b':
					showBlocks = true;
					break;
				case 't':
					showTransactions = true;
					break;
				default:
					print_debugger_command_usage(argv[0]);
					return 0;
			}
		}
		i++;
	}
 
	if (i >= argc) {
		print_debugger_command_usage(argv[0]);
		return 0;
	}
 
	block_cache* cache = (struct block_cache*)(addr_t)parse_expression(argv[i]);
	if (cache == NULL) {
		kprintf("invalid cache address\n");
		return 0;
	}
 
	off_t blockNumber = -1;
	if (i + 1 < argc) {
		blockNumber = parse_expression(argv[i + 1]);
		cached_block* block = cache->hash->Lookup(blockNumber);
		if (block != NULL)
			dump_block_long(block);
		else
			kprintf("block %" B_PRIdOFF " not found\n", blockNumber);
		return 0;
	}
 
	kprintf("BLOCK CACHE: %p\n", cache);
 
	kprintf(" fd:           %d\n", cache->fd);
	kprintf(" max_blocks:   %" B_PRIdOFF "\n", cache->max_blocks);
	kprintf(" block_size:   %zu\n", cache->block_size);
	kprintf(" next_transaction_id: %" B_PRId32 "\n", cache->next_transaction_id);
	kprintf(" buffer_cache: %p\n", cache->buffer_cache);
	kprintf(" busy_reading: %" B_PRIu32 ", %s waiters\n", cache->busy_reading_count,
		cache->busy_reading_waiters ? "has" : "no");
	kprintf(" busy_writing: %" B_PRIu32 ", %s waiters\n", cache->busy_writing_count,
		cache->busy_writing_waiters ? "has" : "no");
 
	if (!cache->pending_notifications.IsEmpty()) {
		kprintf(" pending notifications:\n");
 
		NotificationList::Iterator iterator
			= cache->pending_notifications.GetIterator();
		while (iterator.HasNext()) {
			cache_notification* notification = iterator.Next();
 
			kprintf("  %p %5" B_PRIx32 " %p - %p\n", notification,
				notification->events_pending, notification->hook,
				notification->data);
		}
	}
 
	if (showTransactions) {
		kprintf(" transactions:\n");
		kprintf("address       id state  blocks  main   sub\n");
 
		TransactionTable::Iterator iterator(cache->transaction_hash);
 
		while (iterator.HasNext()) {
			cache_transaction* transaction = iterator.Next();
			kprintf("%p %5" B_PRId32 " %-7s %5" B_PRId32 " %5" B_PRId32 " %5"
				B_PRId32 "\n", transaction, transaction->id,
				transaction->open ? "open" : "closed",
				transaction->num_blocks, transaction->main_num_blocks,
				transaction->sub_num_blocks);
		}
	}
 
	if (showBlocks) {
		kprintf(" blocks:\n");
		kprintf("address  block no. current  original parent    refs access "
			"flags transact prev. trans\n");
	}
 
	uint32 referenced = 0;
	uint32 count = 0;
	uint32 dirty = 0;
	uint32 discarded = 0;
	BlockTable::Iterator iterator(cache->hash);
	while (iterator.HasNext()) {
		cached_block* block = iterator.Next();
		if (showBlocks)
			dump_block(block);
 
		if (block->is_dirty)
			dirty++;
		if (block->discard)
			discarded++;
		if (block->ref_count)
			referenced++;
		count++;
	}
 
	kprintf(" %" B_PRIu32 " blocks total, %" B_PRIu32 " dirty, %" B_PRIu32
		" discarded, %" B_PRIu32 " referenced, %" B_PRIu32 " busy, %" B_PRIu32
		" in unused.\n",
		count, dirty, discarded, referenced, cache->busy_reading_count,
		cache->unused_block_count);
	return 0;
}
 
 
static int
dump_transaction(int argc, char** argv)
{
	bool showBlocks = false;
	int i = 1;
	if (argc > 1 && !strcmp(argv[1], "-b")) {
		showBlocks = true;
		i++;
	}
 
	if (argc - i < 1 || argc - i > 2) {
		print_debugger_command_usage(argv[0]);
		return 0;
	}
 
	cache_transaction* transaction = NULL;
 
	if (argc - i == 1) {
		transaction = (cache_transaction*)(addr_t)parse_expression(argv[i]);
	} else {
		block_cache* cache = (block_cache*)(addr_t)parse_expression(argv[i]);
		int32 id = parse_expression(argv[i + 1]);
		transaction = lookup_transaction(cache, id);
		if (transaction == NULL) {
			kprintf("No transaction with ID %" B_PRId32 " found.\n", id);
			return 0;
		}
	}
 
	kprintf("TRANSACTION %p\n", transaction);
 
	kprintf(" id:             %" B_PRId32 "\n", transaction->id);
	kprintf(" num block:      %" B_PRId32 "\n", transaction->num_blocks);
	kprintf(" main num block: %" B_PRId32 "\n", transaction->main_num_blocks);
	kprintf(" sub num block:  %" B_PRId32 "\n", transaction->sub_num_blocks);
	kprintf(" has sub:        %d\n", transaction->has_sub_transaction);
	kprintf(" state:          %s\n", transaction->open ? "open" : "closed");
	kprintf(" idle:           %" B_PRId64 " secs\n",
		(system_time() - transaction->last_used) / 1000000);
 
	kprintf(" listeners:\n");
 
	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
	while (iterator.HasNext()) {
		cache_listener* listener = iterator.Next();
 
		kprintf("  %p %5" B_PRIx32 " %p - %p\n", listener, listener->events_pending,
			listener->hook, listener->data);
	}
 
	if (!showBlocks)
		return 0;
 
	kprintf(" blocks:\n");
	kprintf("address  block no. current  original parent    refs access "
		"flags transact prev. trans\n");
 
	cached_block* block = transaction->first_block;
	while (block != NULL) {
		dump_block(block);
		block = block->transaction_next;
	}
 
	kprintf("--\n");
 
	block_list::Iterator blockIterator = transaction->blocks.GetIterator();
	while (blockIterator.HasNext()) {
		block = blockIterator.Next();
		dump_block(block);
	}
 
	return 0;
}
 
 
static int
dump_caches(int argc, char** argv)
{
	kprintf("Block caches:\n");
	DoublyLinkedList<block_cache>::Iterator i = sCaches.GetIterator();
	while (i.HasNext()) {
		block_cache* cache = i.Next();
		if (cache == (block_cache*)&sMarkCache)
			continue;
 
		kprintf("  %p\n", cache);
	}
 
	return 0;
}
 
 
#if BLOCK_CACHE_BLOCK_TRACING >= 2
static int
dump_block_data(int argc, char** argv)
{
	using namespace BlockTracing;
 
	// Determine which blocks to show
 
	bool printStackTrace = true;
	uint32 which = 0;
	int32 i = 1;
	while (i < argc && argv[i][0] == '-') {
		char* arg = &argv[i][1];
		while (arg[0]) {
			switch (arg[0]) {
				case 'c':
					which |= BlockData::kCurrent;
					break;
				case 'p':
					which |= BlockData::kParent;
					break;
				case 'o':
					which |= BlockData::kOriginal;
					break;
 
				default:
					kprintf("invalid block specifier (only o/c/p are "
						"allowed).\n");
					return 0;
			}
			arg++;
		}
 
		i++;
	}
	if (which == 0)
		which = BlockData::kCurrent | BlockData::kParent | BlockData::kOriginal;
 
	if (i == argc) {
		print_debugger_command_usage(argv[0]);
		return 0;
	}
 
	// Get the range of blocks to print
 
	int64 from = parse_expression(argv[i]);
	int64 to = from;
	if (argc > i + 1)
		to = parse_expression(argv[i + 1]);
	if (to < from)
		to = from;
 
	uint32 offset = 0;
	uint32 size = LONG_MAX;
	if (argc > i + 2)
		offset = parse_expression(argv[i + 2]);
	if (argc > i + 3)
		size = parse_expression(argv[i + 3]);
 
	TraceEntryIterator iterator;
	iterator.MoveTo(from - 1);
 
	static char sBuffer[1024];
	LazyTraceOutput out(sBuffer, sizeof(sBuffer), TRACE_OUTPUT_TEAM_ID);
 
	while (TraceEntry* entry = iterator.Next()) {
		int32 index = iterator.Index();
		if (index > to)
			break;
 
		Action* action = dynamic_cast<Action*>(entry);
		if (action != NULL) {
			out.Clear();
			out.DumpEntry(action);
			continue;
		}
 
		BlockData* blockData = dynamic_cast<BlockData*>(entry);
		if (blockData == NULL)
			continue;
 
		out.Clear();
 
		const char* dump = out.DumpEntry(entry);
		int length = strlen(dump);
		if (length > 0 && dump[length - 1] == '\n')
			length--;
 
		kprintf("%5" B_PRId32 ". %.*s\n", index, length, dump);
 
		if (printStackTrace) {
			out.Clear();
			entry->DumpStackTrace(out);
			if (out.Size() > 0)
				kputs(out.Buffer());
		}
 
		blockData->DumpBlocks(which, offset, size);
	}
 
	return 0;
}
#endif	// BLOCK_CACHE_BLOCK_TRACING >= 2
 
 
#endif	// DEBUG_BLOCK_CACHE
 
 
/*!	Traverses through the block_cache list, and returns one cache after the
	other. The cache returned is automatically locked when you get it, and
	unlocked with the next call to this function. Ignores caches that are in
	deletion state.
	Returns \c NULL when the end of the list is reached.
*/
static block_cache*
get_next_locked_block_cache(block_cache* last)
{
	MutexLocker _(sCachesLock);
 
	block_cache* cache;
	if (last != NULL) {
		mutex_unlock(&last->lock);
 
		cache = sCaches.GetNext((block_cache*)&sMarkCache);
		sCaches.Remove((block_cache*)&sMarkCache);
	} else
		cache = sCaches.Head();
 
	if (cache != NULL) {
		mutex_lock(&cache->lock);
		sCaches.Insert(sCaches.GetNext(cache), (block_cache*)&sMarkCache);
	}
 
	return cache;
}
 
 
/*!	Background thread that continuously checks for pending notifications of
	all caches.
	Every two seconds, it will also write back up to 64 blocks per cache.
*/
static status_t
block_notifier_and_writer(void* /*data*/)
{
	const bigtime_t kTimeout = 2000000LL;
	bigtime_t timeout = kTimeout;
 
	while (true) {
		bigtime_t start = system_time();
 
		status_t status = acquire_sem_etc(sEventSemaphore, 1,
			B_RELATIVE_TIMEOUT, timeout);
		if (status == B_OK) {
			flush_pending_notifications();
			timeout -= system_time() - start;
			continue;
		}
 
		// write 64 blocks of each block_cache every two seconds
		// TODO: change this once we have an I/O scheduler
		timeout = kTimeout;
		size_t usedMemory;
		object_cache_get_usage(sBlockCache, &usedMemory);
 
		block_cache* cache = NULL;
		while ((cache = get_next_locked_block_cache(cache)) != NULL) {
			BlockWriter writer(cache, 64);
 
			size_t cacheUsedMemory;
			object_cache_get_usage(cache->buffer_cache, &cacheUsedMemory);
			usedMemory += cacheUsedMemory;
 
			if (cache->num_dirty_blocks) {
				// This cache is not using transactions, we'll scan the blocks
				// directly
				BlockTable::Iterator iterator(cache->hash);
 
				while (iterator.HasNext()) {
					cached_block* block = iterator.Next();
					if (block->CanBeWritten() && !writer.Add(block))
						break;
				}
 
			} else {
				TransactionTable::Iterator iterator(cache->transaction_hash);
 
				while (iterator.HasNext()) {
					cache_transaction* transaction = iterator.Next();
					if (transaction->open) {
						if (system_time() > transaction->last_used
								+ kTransactionIdleTime) {
							// Transaction is open but idle
							notify_transaction_listeners(cache, transaction,
								TRANSACTION_IDLE);
						}
						continue;
					}
 
					bool hasLeftOvers;
						// we ignore this one
					if (!writer.Add(transaction, hasLeftOvers))
						break;
				}
			}
 
			writer.Write();
 
			if ((block_cache_used_memory() / B_PAGE_SIZE)
					> vm_page_num_pages() / 2) {
				// Try to reduce memory usage to half of the available
				// RAM at maximum
				cache->RemoveUnusedBlocks(1000, 10);
			}
		}
 
		MutexLocker _(sCachesMemoryUseLock);
		sUsedMemory = usedMemory;
	}
 
	// never can get here
	return B_OK;
}
 
 
/*!	Notify function for wait_for_notifications(). */
static void
notify_sync(int32 transactionID, int32 event, void* _cache)
{
	block_cache* cache = (block_cache*)_cache;
 
	cache->condition_variable.NotifyOne();
}
 
 
/*!	Must be called with the sCachesLock held. */
static bool
is_valid_cache(block_cache* cache)
{
	ASSERT_LOCKED_MUTEX(&sCachesLock);
 
	DoublyLinkedList<block_cache>::Iterator iterator = sCaches.GetIterator();
	while (iterator.HasNext()) {
		if (cache == iterator.Next())
			return true;
	}
 
	return false;
}
 
 
/*!	Waits until all pending notifications are carried out.
	Safe to be called from the block writer/notifier thread.
	You must not hold the \a cache lock when calling this function.
*/
static void
wait_for_notifications(block_cache* cache)
{
	MutexLocker locker(sCachesLock);
 
	if (find_thread(NULL) == sNotifierWriterThread) {
		// We're the notifier thread, don't wait, but flush all pending
		// notifications directly.
		if (is_valid_cache(cache))
			flush_pending_notifications(cache);
		return;
	}
 
	// add sync notification
	cache_notification notification;
	set_notification(NULL, notification, TRANSACTION_WRITTEN, notify_sync,
		cache);
 
	ConditionVariableEntry entry;
	cache->condition_variable.Add(&entry);
 
	add_notification(cache, &notification, TRANSACTION_WRITTEN, false);
	locker.Unlock();
 
	// wait for notification hook to be called
	entry.Wait();
}
 
 
status_t
block_cache_init(void)
{
	sBlockCache = create_object_cache_etc("cached blocks", sizeof(cached_block),
		8, 0, 0, 0, CACHE_LARGE_SLAB, NULL, NULL, NULL, NULL);
	if (sBlockCache == NULL)
		return B_NO_MEMORY;
 
	new (&sCaches) DoublyLinkedList<block_cache>;
		// manually call constructor
 
	sEventSemaphore = create_sem(0, "block cache event");
	if (sEventSemaphore < B_OK)
		return sEventSemaphore;
 
	sNotifierWriterThread = spawn_kernel_thread(&block_notifier_and_writer,
		"block notifier/writer", B_LOW_PRIORITY, NULL);
	if (sNotifierWriterThread >= B_OK)
		resume_thread(sNotifierWriterThread);
 
#if DEBUG_BLOCK_CACHE
	add_debugger_command_etc("block_caches", &dump_caches,
		"dumps all block caches", "\n", 0);
	add_debugger_command_etc("block_cache", &dump_cache,
		"dumps a specific block cache",
		"[-bt] <cache-address> [block-number]\n"
		"  -t lists the transactions\n"
		"  -b lists all blocks\n", 0);
	add_debugger_command("cached_block", &dump_cached_block,
		"dumps the specified cached block");
	add_debugger_command_etc("transaction", &dump_transaction,
		"dumps a specific transaction", "[-b] ((<cache> <id>) | <transaction>)\n"
		"Either use a block cache pointer and an ID or a pointer to the transaction.\n"
		"  -b lists all blocks that are part of this transaction\n", 0);
#	if BLOCK_CACHE_BLOCK_TRACING >= 2
	add_debugger_command_etc("block_cache_data", &dump_block_data,
		"dumps the data blocks logged for the actions",
		"[-cpo] <from> [<to> [<offset> [<size>]]]\n"
		"If no data specifier is used, all blocks are shown by default.\n"
		" -c       the current data is shown, if available.\n"
		" -p       the parent data is shown, if available.\n"
		" -o       the original data is shown, if available.\n"
		" <from>   first index of tracing entries to show.\n"
		" <to>     if given, the last entry. If not, only <from> is shown.\n"
		" <offset> the offset of the block data.\n"
		" <from>   the size of the block data that is dumped\n", 0);
#	endif
#endif	// DEBUG_BLOCK_CACHE
 
	return B_OK;
}
 
 
size_t
block_cache_used_memory(void)
{
	MutexLocker _(sCachesMemoryUseLock);
	return sUsedMemory;
}
 
 
//	#pragma mark - public transaction API
 
 
int32
cache_start_transaction(void* _cache)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	if (cache->last_transaction && cache->last_transaction->open) {
		panic("last transaction (%" B_PRId32 ") still open!\n",
			cache->last_transaction->id);
	}
 
	cache_transaction* transaction = new(std::nothrow) cache_transaction;
	if (transaction == NULL)
		return B_NO_MEMORY;
 
	transaction->id = atomic_add(&cache->next_transaction_id, 1);
	cache->last_transaction = transaction;
 
	TRACE(("cache_start_transaction(): id %" B_PRId32 " started\n", transaction->id));
	T(Action("start", cache, transaction));
 
	cache->transaction_hash->Insert(transaction);
 
	return transaction->id;
}
 
 
status_t
cache_sync_transaction(void* _cache, int32 id)
{
	block_cache* cache = (block_cache*)_cache;
	bool hadBusy;
 
	TRACE(("cache_sync_transaction(id %" B_PRId32 ")\n", id));
 
	do {
		TransactionLocker locker(cache);
		hadBusy = false;
 
		BlockWriter writer(cache);
		TransactionTable::Iterator iterator(cache->transaction_hash);
 
		while (iterator.HasNext()) {
			// close all earlier transactions which haven't been closed yet
			cache_transaction* transaction = iterator.Next();
 
			if (transaction->busy_writing_count != 0) {
				hadBusy = true;
				continue;
			}
			if (transaction->id <= id && !transaction->open) {
				// write back all of their remaining dirty blocks
				T(Action("sync", cache, transaction));
 
				bool hasLeftOvers;
				writer.Add(transaction, hasLeftOvers);
 
				if (hasLeftOvers) {
					// This transaction contains blocks that a previous
					// transaction is trying to write back in this write run
					hadBusy = true;
				}
			}
		}
 
		status_t status = writer.Write();
		if (status != B_OK)
			return status;
	} while (hadBusy);
 
	wait_for_notifications(cache);
		// make sure that all pending TRANSACTION_WRITTEN notifications
		// are handled after we return
	return B_OK;
}
 
 
status_t
cache_end_transaction(void* _cache, int32 id,
	transaction_notification_hook hook, void* data)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	TRACE(("cache_end_transaction(id = %" B_PRId32 ")\n", id));
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL) {
		panic("cache_end_transaction(): invalid transaction ID\n");
		return B_BAD_VALUE;
	}
 
	// Write back all pending transaction blocks
	status_t status = write_blocks_in_previous_transaction(cache, transaction);
	if (status != B_OK)
		return status;
 
	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
 
	if (hook != NULL
		&& add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN,
			hook, data) != B_OK) {
		return B_NO_MEMORY;
	}
 
	T(Action("end", cache, transaction));
 
	// iterate through all blocks and free the unchanged original contents
 
	cached_block* next;
	for (cached_block* block = transaction->first_block; block != NULL;
			block = next) {
		next = block->transaction_next;
		ASSERT(block->previous_transaction == NULL);
 
		if (block->discard) {
			// This block has been discarded in the transaction
			cache->DiscardBlock(block);
			transaction->num_blocks--;
			continue;
		}
 
		if (block->original_data != NULL) {
			cache->Free(block->original_data);
			block->original_data = NULL;
		}
		if (block->parent_data != NULL) {
			ASSERT(transaction->has_sub_transaction);
			cache->FreeBlockParentData(block);
		}
 
		// move the block to the previous transaction list
		transaction->blocks.Add(block);
 
		block->previous_transaction = transaction;
		block->transaction_next = NULL;
		block->transaction = NULL;
	}
 
	transaction->open = false;
	return B_OK;
}
 
 
status_t
cache_abort_transaction(void* _cache, int32 id)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	TRACE(("cache_abort_transaction(id = %" B_PRId32 ")\n", id));
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL) {
		panic("cache_abort_transaction(): invalid transaction ID\n");
		return B_BAD_VALUE;
	}
 
	T(Abort(cache, transaction));
	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
 
	// iterate through all blocks and restore their original contents
 
	cached_block* block = transaction->first_block;
	cached_block* next;
	for (; block != NULL; block = next) {
		next = block->transaction_next;
 
		if (block->original_data != NULL) {
			TRACE(("cache_abort_transaction(id = %" B_PRId32 "): restored contents of "
				"block %" B_PRIdOFF "\n", transaction->id, block->block_number));
			memcpy(block->current_data, block->original_data,
				cache->block_size);
			cache->Free(block->original_data);
			block->original_data = NULL;
		}
		if (transaction->has_sub_transaction && block->parent_data != NULL)
			cache->FreeBlockParentData(block);
 
		block->transaction_next = NULL;
		block->transaction = NULL;
		block->discard = false;
		if (block->previous_transaction == NULL)
			block->is_dirty = false;
	}
 
	cache->transaction_hash->Remove(transaction);
	delete_transaction(cache, transaction);
	return B_OK;
}
 
 
/*!	Acknowledges the current parent transaction, and starts a new transaction
	from its sub transaction.
	The new transaction also gets a new transaction ID.
*/
int32
cache_detach_sub_transaction(void* _cache, int32 id,
	transaction_notification_hook hook, void* data)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	TRACE(("cache_detach_sub_transaction(id = %" B_PRId32 ")\n", id));
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL) {
		panic("cache_detach_sub_transaction(): invalid transaction ID\n");
		return B_BAD_VALUE;
	}
	if (!transaction->has_sub_transaction)
		return B_BAD_VALUE;
 
	// iterate through all blocks and free the unchanged original contents
 
	status_t status = write_blocks_in_previous_transaction(cache, transaction);
	if (status != B_OK)
		return status;
 
	// create a new transaction for the sub transaction
	cache_transaction* newTransaction = new(std::nothrow) cache_transaction;
	if (newTransaction == NULL)
		return B_NO_MEMORY;
 
	newTransaction->id = atomic_add(&cache->next_transaction_id, 1);
	T(Detach(cache, transaction, newTransaction));
 
	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
 
	if (add_transaction_listener(cache, transaction, TRANSACTION_WRITTEN, hook,
			data) != B_OK) {
		delete newTransaction;
		return B_NO_MEMORY;
	}
 
	cached_block* last = NULL;
	cached_block* next;
	for (cached_block* block = transaction->first_block; block != NULL;
			block = next) {
		next = block->transaction_next;
		ASSERT(block->previous_transaction == NULL);
 
		if (block->discard) {
			cache->DiscardBlock(block);
			transaction->main_num_blocks--;
			continue;
		}
 
		if (block->parent_data != NULL) {
			// The block changed in the parent - free the original data, since
			// they will be replaced by what is in current.
			ASSERT(block->original_data != NULL);
			cache->Free(block->original_data);
 
			if (block->parent_data != block->current_data) {
				// The block had been changed in both transactions
				block->original_data = block->parent_data;
			} else {
				// The block has only been changed in the parent
				block->original_data = NULL;
			}
 
			// move the block to the previous transaction list
			transaction->blocks.Add(block);
			block->previous_transaction = transaction;
			block->parent_data = NULL;
		}
 
		if (block->original_data != NULL) {
			// This block had been changed in the current sub transaction,
			// we need to move this block over to the new transaction.
			ASSERT(block->parent_data == NULL);
 
			if (last == NULL)
				newTransaction->first_block = block;
			else
				last->transaction_next = block;
 
			block->transaction = newTransaction;
			last = block;
		} else
			block->transaction = NULL;
 
		block->transaction_next = NULL;
	}
 
	newTransaction->num_blocks = transaction->sub_num_blocks;
 
	transaction->open = false;
	transaction->has_sub_transaction = false;
	transaction->num_blocks = transaction->main_num_blocks;
	transaction->sub_num_blocks = 0;
 
	cache->transaction_hash->Insert(newTransaction);
	cache->last_transaction = newTransaction;
 
	return newTransaction->id;
}
 
 
status_t
cache_abort_sub_transaction(void* _cache, int32 id)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 ")\n", id));
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL) {
		panic("cache_abort_sub_transaction(): invalid transaction ID\n");
		return B_BAD_VALUE;
	}
	if (!transaction->has_sub_transaction)
		return B_BAD_VALUE;
 
	T(Abort(cache, transaction));
	notify_transaction_listeners(cache, transaction, TRANSACTION_ABORTED);
 
	// revert all changes back to the version of the parent
 
	cached_block* block = transaction->first_block;
	cached_block* last = NULL;
	cached_block* next;
	for (; block != NULL; block = next) {
		next = block->transaction_next;
 
		if (block->parent_data == NULL) {
			// The parent transaction didn't change the block, but the sub
			// transaction did - we need to revert to the original data.
			// The block is no longer part of the transaction
			if (block->original_data != NULL) {
				// The block might not have original data if was empty
				memcpy(block->current_data, block->original_data,
					cache->block_size);
			}
 
			if (last != NULL)
				last->transaction_next = next;
			else
				transaction->first_block = next;
 
			block->transaction_next = NULL;
			block->transaction = NULL;
			transaction->num_blocks--;
 
			if (block->previous_transaction == NULL) {
				cache->Free(block->original_data);
				block->original_data = NULL;
				block->is_dirty = false;
 
				if (block->ref_count == 0) {
					// Move the block into the unused list if possible
					block->unused = true;
					cache->unused_blocks.Add(block);
					cache->unused_block_count++;
				}
			}
		} else {
			if (block->parent_data != block->current_data) {
				// The block has been changed and must be restored - the block
				// is still dirty and part of the transaction
				TRACE(("cache_abort_sub_transaction(id = %" B_PRId32 "): "
					"restored contents of block %" B_PRIdOFF "\n",
					transaction->id, block->block_number));
				memcpy(block->current_data, block->parent_data,
					cache->block_size);
				cache->Free(block->parent_data);
				// The block stays dirty
			}
			block->parent_data = NULL;
			last = block;
		}
 
		block->discard = false;
	}
 
	// all subsequent changes will go into the main transaction
	transaction->has_sub_transaction = false;
	transaction->sub_num_blocks = 0;
 
	return B_OK;
}
 
 
status_t
cache_start_sub_transaction(void* _cache, int32 id)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	TRACE(("cache_start_sub_transaction(id = %" B_PRId32 ")\n", id));
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL) {
		panic("cache_start_sub_transaction(): invalid transaction ID %" B_PRId32 "\n",
			id);
		return B_BAD_VALUE;
	}
 
	notify_transaction_listeners(cache, transaction, TRANSACTION_ENDED);
 
	// move all changed blocks up to the parent
 
	cached_block* block = transaction->first_block;
	cached_block* next;
	for (; block != NULL; block = next) {
		next = block->transaction_next;
 
		if (block->parent_data != NULL) {
			// There already is an older sub transaction - we acknowledge
			// its changes and move its blocks up to the parent
			ASSERT(transaction->has_sub_transaction);
			cache->FreeBlockParentData(block);
		}
		if (block->discard) {
			// This block has been discarded in the parent transaction.
			// Just throw away any changes made in this transaction, so that
			// it can still be reverted to its original contents if needed
			ASSERT(block->previous_transaction == NULL);
			if (block->original_data != NULL) {
				memcpy(block->current_data, block->original_data,
					cache->block_size);
				block->original_data = NULL;
			}
			continue;
		}
 
		// we "allocate" the parent data lazily, that means, we don't copy
		// the data (and allocate memory for it) until we need to
		block->parent_data = block->current_data;
	}
 
	// all subsequent changes will go into the sub transaction
	transaction->has_sub_transaction = true;
	transaction->main_num_blocks = transaction->num_blocks;
	transaction->sub_num_blocks = 0;
	T(Action("start-sub", cache, transaction));
 
	return B_OK;
}
 
 
/*!	Adds a transaction listener that gets notified when the transaction
	is ended, aborted, written, or idle as specified by \a events.
	The listener gets automatically removed when the transaction ends.
*/
status_t
cache_add_transaction_listener(void* _cache, int32 id, int32 events,
	transaction_notification_hook hook, void* data)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL)
		return B_BAD_VALUE;
 
	return add_transaction_listener(cache, transaction, events, hook, data);
}
 
 
status_t
cache_remove_transaction_listener(void* _cache, int32 id,
	transaction_notification_hook hookFunction, void* data)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL)
		return B_BAD_VALUE;
 
	ListenerList::Iterator iterator = transaction->listeners.GetIterator();
	while (iterator.HasNext()) {
		cache_listener* listener = iterator.Next();
		if (listener->data == data && listener->hook == hookFunction) {
			iterator.Remove();
 
			if (listener->events_pending != 0) {
				MutexLocker _(sNotificationsLock);
				if (listener->events_pending != 0)
					cache->pending_notifications.Remove(listener);
			}
			delete listener;
			return B_OK;
		}
	}
 
	return B_ENTRY_NOT_FOUND;
}
 
 
status_t
cache_next_block_in_transaction(void* _cache, int32 id, bool mainOnly,
	long* _cookie, off_t* _blockNumber, void** _data, void** _unchangedData)
{
	cached_block* block = (cached_block*)*_cookie;
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL || !transaction->open)
		return B_BAD_VALUE;
 
	if (block == NULL)
		block = transaction->first_block;
	else
		block = block->transaction_next;
 
	if (transaction->has_sub_transaction) {
		if (mainOnly) {
			// find next block that the parent changed
			while (block != NULL && block->parent_data == NULL)
				block = block->transaction_next;
		} else {
			// find next non-discarded block
			while (block != NULL && block->discard)
				block = block->transaction_next;
		}
	}
 
	if (block == NULL)
		return B_ENTRY_NOT_FOUND;
 
	if (_blockNumber)
		*_blockNumber = block->block_number;
	if (_data)
		*_data = mainOnly ? block->parent_data : block->current_data;
	if (_unchangedData)
		*_unchangedData = block->original_data;
 
	*_cookie = (addr_t)block;
	return B_OK;
}
 
 
int32
cache_blocks_in_transaction(void* _cache, int32 id)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL)
		return B_BAD_VALUE;
 
	return transaction->num_blocks;
}
 
 
/*!	Returns the number of blocks that are part of the main transaction. If this
	transaction does not have a sub transaction yet, this is the same value as
	cache_blocks_in_transaction() would return.
*/
int32
cache_blocks_in_main_transaction(void* _cache, int32 id)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL)
		return B_BAD_VALUE;
 
	if (transaction->has_sub_transaction)
		return transaction->main_num_blocks;
 
	return transaction->num_blocks;
}
 
 
int32
cache_blocks_in_sub_transaction(void* _cache, int32 id)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	cache_transaction* transaction = lookup_transaction(cache, id);
	if (transaction == NULL)
		return B_BAD_VALUE;
 
	return transaction->sub_num_blocks;
}
 
 
/*!	Check if block is in transaction
*/
bool
cache_has_block_in_transaction(void* _cache, int32 id, off_t blockNumber)
{
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	cached_block* block = cache->hash->Lookup(blockNumber);
 
	return (block != NULL && block->transaction != NULL
		&& block->transaction->id == id);
}
 
 
//	#pragma mark - public block cache API
 
 
void
block_cache_delete(void* _cache, bool allowWrites)
{
	block_cache* cache = (block_cache*)_cache;
 
	if (allowWrites)
		block_cache_sync(cache);
 
	mutex_lock(&sCachesLock);
	sCaches.Remove(cache);
	mutex_unlock(&sCachesLock);
 
	mutex_lock(&cache->lock);
 
	// wait for all blocks to become unbusy
	wait_for_busy_reading_blocks(cache);
	wait_for_busy_writing_blocks(cache);
 
	// free all blocks
 
	cached_block* block = cache->hash->Clear(true);
	while (block != NULL) {
		cached_block* next = block->next;
		cache->FreeBlock(block);
		block = next;
	}
 
	// free all transactions (they will all be aborted)
 
	cache_transaction* transaction = cache->transaction_hash->Clear(true);
	while (transaction != NULL) {
		cache_transaction* next = transaction->next;
		delete transaction;
		transaction = next;
	}
 
	delete cache;
}
 
 
void*
block_cache_create(int fd, off_t numBlocks, size_t blockSize, bool readOnly)
{
	block_cache* cache = new(std::nothrow) block_cache(fd, numBlocks, blockSize,
		readOnly);
	if (cache == NULL)
		return NULL;
 
	if (cache->Init() != B_OK) {
		delete cache;
		return NULL;
	}
 
	MutexLocker _(sCachesLock);
	sCaches.Add(cache);
 
	return cache;
}
 
 
status_t
block_cache_sync(void* _cache)
{
	block_cache* cache = (block_cache*)_cache;
 
	// We will sync all dirty blocks to disk that have a completed
	// transaction or no transaction only
 
	MutexLocker locker(&cache->lock);
 
	BlockWriter writer(cache);
	BlockTable::Iterator iterator(cache->hash);
 
	while (iterator.HasNext()) {
		cached_block* block = iterator.Next();
		if (block->CanBeWritten())
			writer.Add(block);
	}
 
	status_t status = writer.Write();
 
	locker.Unlock();
 
	wait_for_notifications(cache);
		// make sure that all pending TRANSACTION_WRITTEN notifications
		// are handled after we return
	return status;
}
 
 
status_t
block_cache_sync_etc(void* _cache, off_t blockNumber, size_t numBlocks)
{
	block_cache* cache = (block_cache*)_cache;
 
	// We will sync all dirty blocks to disk that have a completed
	// transaction or no transaction only
 
	if (blockNumber < 0 || blockNumber >= cache->max_blocks) {
		panic("block_cache_sync_etc: invalid block number %" B_PRIdOFF
			" (max %" B_PRIdOFF ")",
			blockNumber, cache->max_blocks - 1);
		return B_BAD_VALUE;
	}
 
	MutexLocker locker(&cache->lock);
	BlockWriter writer(cache);
 
	for (; numBlocks > 0; numBlocks--, blockNumber++) {
		cached_block* block = cache->hash->Lookup(blockNumber);
		if (block == NULL)
			continue;
 
		if (block->CanBeWritten())
			writer.Add(block);
	}
 
	status_t status = writer.Write();
 
	locker.Unlock();
 
	wait_for_notifications(cache);
		// make sure that all pending TRANSACTION_WRITTEN notifications
		// are handled after we return
	return status;
}
 
 
/*!	Discards a block from the current transaction or from the cache.
	You have to call this function when you no longer use a block, ie. when it
	might be reclaimed by the file cache in order to make sure they won't
	interfere.
*/
void
block_cache_discard(void* _cache, off_t blockNumber, size_t numBlocks)
{
	// TODO: this could be a nice place to issue the ATA trim command
	block_cache* cache = (block_cache*)_cache;
	TransactionLocker locker(cache);
 
	BlockWriter writer(cache);
 
	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
		cached_block* block = cache->hash->Lookup(blockNumber);
		if (block != NULL && block->previous_transaction != NULL)
			writer.Add(block);
	}
 
	writer.Write();
		// TODO: this can fail, too!
 
	blockNumber -= numBlocks;
		// reset blockNumber to its original value
 
	for (size_t i = 0; i < numBlocks; i++, blockNumber++) {
		cached_block* block = cache->hash->Lookup(blockNumber);
		if (block == NULL)
			continue;
 
		ASSERT(block->previous_transaction == NULL);
 
		if (block->unused) {
			cache->unused_blocks.Remove(block);
			cache->unused_block_count--;
			cache->RemoveBlock(block);
		} else {
			if (block->transaction != NULL && block->parent_data != NULL
				&& block->parent_data != block->current_data) {
				panic("Discarded block %" B_PRIdOFF " has already been changed in this "
					"transaction!", blockNumber);
			}
 
			// mark it as discarded (in the current transaction only, if any)
			block->discard = true;
		}
	}
}
 
 
status_t
block_cache_make_writable(void* _cache, off_t blockNumber, int32 transaction)
{
	block_cache* cache = (block_cache*)_cache;
	MutexLocker locker(&cache->lock);
 
	if (cache->read_only) {
		panic("tried to make block writable on a read-only cache!");
		return B_ERROR;
	}
 
	// TODO: this can be done better!
	void* block = get_writable_cached_block(cache, blockNumber,
		blockNumber, 1, transaction, false);
	if (block != NULL) {
		put_cached_block((block_cache*)_cache, blockNumber);
		return B_OK;
	}
 
	return B_ERROR;
}
 
 
void*
block_cache_get_writable_etc(void* _cache, off_t blockNumber, off_t base,
	off_t length, int32 transaction)
{
	block_cache* cache = (block_cache*)_cache;
	MutexLocker locker(&cache->lock);
 
	TRACE(("block_cache_get_writable_etc(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
		blockNumber, transaction));
	if (cache->read_only)
		panic("tried to get writable block on a read-only cache!");
 
	return get_writable_cached_block(cache, blockNumber, base, length,
		transaction, false);
}
 
 
void*
block_cache_get_writable(void* _cache, off_t blockNumber, int32 transaction)
{
	return block_cache_get_writable_etc(_cache, blockNumber,
		blockNumber, 1, transaction);
}
 
 
void*
block_cache_get_empty(void* _cache, off_t blockNumber, int32 transaction)
{
	block_cache* cache = (block_cache*)_cache;
	MutexLocker locker(&cache->lock);
 
	TRACE(("block_cache_get_empty(block = %" B_PRIdOFF ", transaction = %" B_PRId32 ")\n",
		blockNumber, transaction));
	if (cache->read_only)
		panic("tried to get empty writable block on a read-only cache!");
 
	return get_writable_cached_block((block_cache*)_cache, blockNumber,
		blockNumber, 1, transaction, true);
}
 
 
const void*
block_cache_get_etc(void* _cache, off_t blockNumber, off_t base, off_t length)
{
	block_cache* cache = (block_cache*)_cache;
	MutexLocker locker(&cache->lock);
	bool allocated;
 
	cached_block* block = get_cached_block(cache, blockNumber, &allocated);
	if (block == NULL)
		return NULL;
 
#if BLOCK_CACHE_DEBUG_CHANGED
	if (block->compare == NULL)
		block->compare = cache->Allocate();
	if (block->compare != NULL)
		memcpy(block->compare, block->current_data, cache->block_size);
#endif
	TB(Get(cache, block));
 
	return block->current_data;
}
 
 
const void*
block_cache_get(void* _cache, off_t blockNumber)
{
	return block_cache_get_etc(_cache, blockNumber, blockNumber, 1);
}
 
 
/*!	Changes the internal status of a writable block to \a dirty. This can be
	helpful in case you realize you don't need to change that block anymore
	for whatever reason.
 
	Note, you must only use this function on blocks that were acquired
	writable!
*/
status_t
block_cache_set_dirty(void* _cache, off_t blockNumber, bool dirty,
	int32 transaction)
{
	block_cache* cache = (block_cache*)_cache;
	MutexLocker locker(&cache->lock);
 
	cached_block* block = cache->hash->Lookup(blockNumber);
	if (block == NULL)
		return B_BAD_VALUE;
	if (block->is_dirty == dirty) {
		// there is nothing to do for us
		return B_OK;
	}
 
	// TODO: not yet implemented
	if (dirty)
		panic("block_cache_set_dirty(): not yet implemented that way!\n");
 
	return B_OK;
}
 
 
void
block_cache_put(void* _cache, off_t blockNumber)
{
	block_cache* cache = (block_cache*)_cache;
	MutexLocker locker(&cache->lock);
 
	put_cached_block(cache, blockNumber);
}
 

V730 Not all members of a class are initialized inside the constructor. Consider inspecting: next, id.

V730 Not all members of a class are initialized inside the constructor. Consider inspecting: lock, busy_reading_condition, busy_writing_condition, condition_variable.