/*
 * Copyright 2007-2010, Axel Dörfler, axeld@pinc-software.de.
 * Distributed under the terms of the MIT License.
 */
 
 
#include "cdda.h"
 
#include <KernelExport.h>
#include <device/scsi.h>
 
#include <algorithm>
#include <ctype.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
 
 
struct cdtext_pack_data {
	uint8	id;
	uint8	track;
	uint8	number;
	uint8	character_position : 4;
	uint8	block_number : 3;
	uint8	double_byte : 1;
	char	text[12];
	uint8	crc[2];
} _PACKED;
 
enum {
	kTrackID	= 0x80,
	kArtistID	= 0x81,
	kMessageID	= 0x85,
};
 
static const uint32 kBufferSize = 16384;
static const uint32 kSenseSize = 1024;
 
 
//	#pragma mark - string functions
 
 
static char *
copy_string(const char *string)
{
	if (string == NULL || !string[0])
		return NULL;
 
	return strdup(string);
}
 
 
static char *
to_utf8(const char* string)
{
	char buffer[256];
	size_t out = 0;
 
	// TODO: assume CP1252 or ISO-8859-1 character set for now
	while (uint32 c = (uint8)string[0]) {
 
		if (c < 0x80) {
			if (out >= sizeof(buffer) - 1)
				break;
			// ASCII character: no change needed
			buffer[out++] = c;
		} else {
			if (c < 0xA0) {
				// Windows CP-1252 - Use a lookup table
				static const uint32 lookup[] = {
					0x20AC, 0, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
					0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0, 0x017D, 0,
					0, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
					0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0, 0x017E, 0x0178
				};
 
				c = lookup[c - 0x80];
			}
 
			// Convert to 2 or 3-byte representation
			if (c == 0) {
				// invalid character, ignore
			} else if (c < 0x800) {
				if (out >= sizeof(buffer) - 2)
					break;
				buffer[out++] = 0xc0 | (c >> 6);
				buffer[out++] = 0x80 | (c & 0x3f);
			} else {
				if (out >= sizeof(buffer) - 3)
					break;
				buffer[out++] = 0xe0 | (c >> 12);
				buffer[out++] = 0x80 | ((c >> 6) & 0x3f);
				buffer[out++] = 0x80 | (c & 0x3f);
			}
		}
 
		string++;
	}
	buffer[out++] = '\0';
 
	char *copy = (char *)malloc(out);
	if (copy == NULL)
		return NULL;
 
	memcpy(copy, buffer, out);
	return copy;
}
 
 
static bool
is_garbage(char c)
{
	return isspace(c) || c == '-' || c == '/' || c == '\\';
}
 
 
static void
sanitize_string(char *&string)
{
	if (string == NULL)
		return;
 
	// strip garbage at the start
 
	uint32 length = strlen(string);
	uint32 garbage = 0;
	while (is_garbage(string[garbage])) {
		garbage++;
	}
 
	length -= garbage;
	if (garbage)
		memmove(string, string + garbage, length + 1);
 
	// strip garbage from the end
 
	while (length > 1 && isspace(string[length - 1])) {
		string[--length] = '\0';
	}
 
	if (!string[0]) {
		// free string if it's empty
		free(string);
		string = NULL;
	}
}
 
 
//! Finds the first occurrence of \a find in \a string, ignores case.
static char*
find_string(const char *string, const char *find)
{
	if (string == NULL || find == NULL)
		return NULL;
 
	char first = tolower(find[0]);
	if (first == '\0')
		return (char *)string;
 
	int32 findLength = strlen(find) - 1;
	find++;
 
	for (; string[0]; string++) {
		if (tolower(string[0]) != first)
			continue;
		if (strncasecmp(string + 1, find, findLength) == 0)
			return (char *)string;
	}
 
	return NULL;
}
 
 
static void
cut_string(char *string, const char *cut)
{
	if (string == NULL || cut == NULL)
		return;
 
	char *found = find_string(string, cut);
	if (found != NULL) {
		uint32 foundLength = strlen(found);
		uint32 cutLength = strlen(cut);
		memmove(found, found + cutLength, foundLength + 1 - cutLength);
	}
}
 
 
static void
sanitize_album(cdtext &text)
{
	cut_string(text.album, text.artist);
	sanitize_string(text.album);
 
	if (text.album != NULL && !strcasecmp(text.album, "My CD")) {
		// don't laugh, people really do that!
		free(text.album);
		text.album = NULL;
	}
 
	if ((text.artist == NULL || text.artist[0] == '\0') && text.album != NULL) {
		// try to extract artist from album
		char *space = strstr(text.album, "  ");
		if (space != NULL) {
			space[0] = '\0';
			text.artist = text.album;
			text.album = copy_string(space + 2);
 
			sanitize_string(text.artist);
			sanitize_string(text.album);
		}
	}
}
 
 
static void
sanitize_titles(cdtext &text)
{
	for (uint8 i = 0; i < text.track_count; i++) {
		cut_string(text.titles[i], "(Album Version)");
		sanitize_string(text.titles[i]);
		sanitize_string(text.artists[i]);
 
		if (text.artists[i] != NULL && text.artist != NULL
			&& !strcasecmp(text.artists[i], text.artist)) {
			// if the title artist is the same as the main artist, remove it
			free(text.artists[i]);
			text.artists[i] = NULL;
		}
 
		if (text.titles[i] != NULL && text.titles[i][0] == '\t' && i > 0)
			text.titles[i] = copy_string(text.titles[i - 1]);
	}
}
 
 
static bool
single_case(const char *string, bool &upper, bool &first)
{
	if (string == NULL)
		return true;
 
	while (string[0]) {
		while (!isalpha(string[0])) {
			string++;
		}
 
		if (first) {
			upper = isupper(string[0]) != 0;
			first = false;
		} else if ((isupper(string[0]) != 0) ^ upper)
			return false;
 
		string++;
	}
 
	return true;
}
 
 
static void
capitalize_string(char *string)
{
	if (string == NULL)
		return;
 
	bool newWord = isalpha(string[0]) || isspace(string[0]);
	while (string[0]) {
		if (isalpha(string[0])) {
			if (newWord) {
				string[0] = toupper(string[0]);
				newWord = false;
			} else
				string[0] = tolower(string[0]);
		} else if (string[0] != '\'')
			newWord = true;
 
		string++;
	}
}
 
 
static void
correct_case(cdtext &text)
{
	// check if all titles share a single case
	bool first = true;
	bool upper;
	if (!single_case(text.album, upper, first)
		|| !single_case(text.artist, upper, first))
		return;
 
	for (int32 i = 0; i < text.track_count; i++) {
		if (!single_case(text.titles[i], upper, first)
			|| !single_case(text.artists[i], upper, first))
			return;
	}
 
	// If we get here, everything has a single case; we fix that
	// and capitalize each word
 
	capitalize_string(text.album);
	capitalize_string(text.artist);
	for (int32 i = 0; i < text.track_count; i++) {
		capitalize_string(text.titles[i]);
		capitalize_string(text.artists[i]);
	}
}
 
 
//	#pragma mark - CD-Text
 
 
cdtext::cdtext()
	:
	artist(NULL),
	album(NULL),
	genre(NULL),
	track_count(0)
{
	memset(titles, 0, sizeof(titles));
	memset(artists, 0, sizeof(artists));
}
 
 
cdtext::~cdtext()
{
	free(album);
	free(artist);
	free(genre);
 
	for (uint8 i = 0; i < track_count; i++) {
		free(titles[i]);
		free(artists[i]);
	}
}
 
 
static bool
is_string_id(uint8 id)
{
	return id >= kTrackID && id <= kMessageID;
}
 
 
/*!	Parses a \a pack data into the provided text buffer; the corresponding
	track number will be left in \a track, and the type of the data in \a id.
	The pack data is explained in SCSI MMC-3.
 
	\a id, \a track, and \a state must stay constant between calls to this
	function. \a state must be initialized to zero for the first call.
*/
static bool
parse_pack_data(cdtext_pack_data *&pack, uint32 &packLeft,
	cdtext_pack_data *&lastPack, uint8 &id, uint8 &track, uint8 &state,
	char *buffer, size_t &length)
{
	if (packLeft < sizeof(cdtext_pack_data))
		return false;
 
	uint8 number = pack->number;
	size_t size = length;
 
	if (state != 0) {
		// we had a terminated string and a missing track
		track++;
 
		memcpy(buffer, lastPack->text + state, 12 - state);
		if (pack->track - track == 1)
			state = 0;
		else
			state += strnlen(buffer, 12 - state);
		return true;
	}
 
	id = pack->id;
	track = pack->track;
 
	buffer[0] = '\0';
	length = 0;
 
	size_t position = pack->character_position;
	if (position > 0 && lastPack != NULL) {
		memcpy(buffer, &lastPack->text[12 - position], position);
		length = position;
	}
 
	while (id == pack->id && track == pack->track) {
#if 0
		dprintf("%u.%u.%u, %u.%u.%u, ", pack->id, pack->track, pack->number,
			pack->double_byte, pack->block_number, pack->character_position);
		for (int32 i = 0; i < 12; i++) {
			if (isprint(pack->text[i]))
				dprintf("%c", pack->text[i]);
			else
				dprintf("-");
		}
		dprintf("\n");
#endif
		if (is_string_id(id)) {
			// TODO: support double byte characters
			if (length + 12 < size) {
				memcpy(buffer + length, pack->text, 12);
				length += 12;
			}
		}
 
		packLeft -= sizeof(cdtext_pack_data);
		if (packLeft < sizeof(cdtext_pack_data))
			return false;
 
		lastPack = pack;
		number++;
		pack++;
 
		if (pack->number != number)
			return false;
	}
 
	if (id == pack->id) {
		length -= pack->character_position;
		if (length >= size)
			length = size - 1;
		buffer[length] = '\0';
 
		if (pack->track > lastPack->track + 1) {
			// there is a missing track
			for (int32 i = 0; i < 12; i++) {
				if (lastPack->text[i] == '\0') {
					state = i + (lastPack->double_byte ? 2 : 1);
					break;
				}
			}
		}
	}
 
	return true;
}
 
 
static void
dump_cdtext(cdtext &text)
{
	if (text.album)
		dprintf("Album:    \"%s\"\n", text.album);
	if (text.artist)
		dprintf("Artist:   \"%s\"\n", text.artist);
	for (uint8 i = 0; i < text.track_count; i++) {
		dprintf("Track %02u: \"%s\"%s%s%s\n", i + 1, text.titles[i],
			text.artists[i] ? " (" : "", text.artists[i] ? text.artists[i] : "",
			text.artists[i] ? ")" : "");
	}
}
 
 
static void
dump_toc(scsi_toc_toc *toc)
{
	int32 numTracks = toc->last_track + 1 - toc->first_track;
 
	for (int32 i = 0; i < numTracks; i++) {
		scsi_toc_track& track = toc->tracks[i];
		scsi_cd_msf& next = toc->tracks[i + 1].start.time;
			// the last track is always lead-out
		scsi_cd_msf& start = toc->tracks[i].start.time;
		scsi_cd_msf length;
 
		uint64 diff = next.minute * kFramesPerMinute
			+ next.second * kFramesPerSecond + next.frame
			- start.minute * kFramesPerMinute
			- start.second * kFramesPerSecond - start.frame;
		length.minute = diff / kFramesPerMinute;
		length.second = (diff % kFramesPerMinute) / kFramesPerSecond;
		length.frame = diff % kFramesPerSecond;
 
		dprintf("%02u. %02u:%02u.%02u (length %02u:%02u.%02u)\n",
			track.track_number, start.minute, start.second, start.frame,
			length.minute, length.second, length.frame);
	}
}
 
 
static status_t
read_frames(int fd, off_t firstFrame, uint8 *buffer, size_t count)
{
	size_t framesLeft = count;
 
	while (framesLeft > 0) {
		// If the initial count was >= 32, and not a multiple of 8, and the
		// ioctl fails, we switch to reading 8 frames at a time. However the
		// last read can read between 1 and 7 frames only, to not overflow
		// the buffer.
		count = std::min(count, framesLeft);
 
		scsi_read_cd read;
		read.start_m = firstFrame / kFramesPerMinute;
		read.start_s = (firstFrame / kFramesPerSecond) % 60;
		read.start_f = firstFrame % kFramesPerSecond;
 
		read.length_m = count / kFramesPerMinute;
		read.length_s = (count / kFramesPerSecond) % 60;
		read.length_f = count % kFramesPerSecond;
 
		read.buffer_length = count * kFrameSize;
		read.buffer = (char *)buffer;
		read.play = false;
 
		if (ioctl(fd, B_SCSI_READ_CD, &read) < 0) {
			// drive couldn't read data - try again to read with a smaller block size
			if (count == 1)
				return errno;
 
			if (count >= 32)
				count = 8;
			else
				count = 1;
 
			continue;
		}
 
		buffer += count * kFrameSize;
		framesLeft -= count;
		firstFrame += count;
	}
 
	return B_OK;
}
 
 
static status_t
read_table_of_contents(int fd, uint32 track, uint8 format, uint8 *buffer,
	size_t bufferSize)
{
	raw_device_command raw;
	uint8 *senseData = (uint8 *)malloc(kSenseSize);
	if (senseData == NULL)
		return B_NO_MEMORY;
 
	memset(&raw, 0, sizeof(raw_device_command));
	memset(senseData, 0, kSenseSize);
	memset(buffer, 0, bufferSize);
 
	scsi_cmd_read_toc &toc = *(scsi_cmd_read_toc*)&raw.command;
	toc.opcode = SCSI_OP_READ_TOC;
	toc.time = 1;
	toc.format = format;
	toc.track = track;
	toc.allocation_length = B_HOST_TO_BENDIAN_INT16(bufferSize);
 
	raw.command_length = 10;
	raw.flags = B_RAW_DEVICE_DATA_IN | B_RAW_DEVICE_REPORT_RESIDUAL
		| B_RAW_DEVICE_SHORT_READ_VALID;
	raw.scsi_status = 0;
	raw.cam_status = 0;
	raw.data = buffer;
	raw.data_length = bufferSize;
	raw.timeout = 10000000LL;	// 10 secs
	raw.sense_data = senseData;
	raw.sense_data_length = sizeof(kSenseSize);
 
	if (ioctl(fd, B_RAW_DEVICE_COMMAND, &raw) == 0
		&& raw.scsi_status == 0 && raw.cam_status == 1) {
		free(senseData);
		return B_OK;
	}
 
	free(senseData);
	return B_ERROR;
}
 
 
//	#pragma mark - exported functions
 
 
status_t
read_cdtext(int fd, struct cdtext &cdtext)
{
	uint8 *buffer = (uint8 *)malloc(kBufferSize);
	if (buffer == NULL)
		return B_NO_MEMORY;
 
	// do it twice, just in case...
	// (at least my CD-ROM sometimes returned broken data on first try)
	read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
		kBufferSize);
	if (read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_CD_TEXT, buffer,
			kBufferSize) != B_OK) {
		free(buffer);
		return B_ERROR;
	}
 
	scsi_toc_general *header = (scsi_toc_general *)buffer;
 
	uint32 packLength = B_BENDIAN_TO_HOST_INT16(header->data_length) - 2;
	cdtext_pack_data *pack = (cdtext_pack_data *)(header + 1);
	cdtext_pack_data *lastPack = NULL;
	uint8 state = 0;
	uint8 track = 0;
	uint8 id = 0;
	char text[256];
 
	// TODO: determine encoding!
 
	while (true) {
		size_t length = sizeof(text);
 
		if (!parse_pack_data(pack, packLength, lastPack, id, track,
				state, text, length))
			break;
 
		switch (id) {
			case kTrackID:
				if (track == 0) {
					if (cdtext.album == NULL)
						cdtext.album = to_utf8(text);
				} else if (track <= kMaxTracks) {
					if (cdtext.titles[track - 1] == NULL)
						cdtext.titles[track - 1] = to_utf8(text);
					if (track > cdtext.track_count)
						cdtext.track_count = track;
				}
				break;
 
			case kArtistID:
				if (track == 0) {
					if (cdtext.artist == NULL)
						cdtext.artist = to_utf8(text);
				} else if (track <= kMaxTracks) {
					if (cdtext.artists[track - 1] == NULL)
						cdtext.artists[track - 1] = to_utf8(text);
				}
				break;
 
			default:
				if (is_string_id(id))
					dprintf("UNKNOWN %u: \"%s\"\n", id, text);
				break;
		}
	}
 
	free(buffer);
 
	if (cdtext.artist == NULL && cdtext.album == NULL)
		return B_ERROR;
 
	for (int i = 0; i < cdtext.track_count; i++) {
		if (cdtext.titles[i] == NULL)
			return B_ERROR;
	}
 
	sanitize_string(cdtext.artist);
	sanitize_album(cdtext);
	sanitize_titles(cdtext);
	correct_case(cdtext);
 
	dump_cdtext(cdtext);
	return B_OK;
}
 
 
status_t
read_table_of_contents(int fd, scsi_toc_toc *toc, size_t length)
{
	status_t status = read_table_of_contents(fd, 1, SCSI_TOC_FORMAT_TOC,
		(uint8*)toc, length);
	if (status < B_OK)
		return status;
 
	// make sure the values in the TOC make sense
 
	int32 lastTrack = toc->last_track + 1 - toc->first_track;
	size_t dataLength = B_BENDIAN_TO_HOST_INT16(toc->data_length) + 2;
	if (dataLength < sizeof(scsi_toc_toc) || lastTrack <= 0)
		return B_BAD_DATA;
 
	if (length > dataLength)
		length = dataLength;
 
	length -= sizeof(scsi_toc_general);
 
	if (lastTrack * sizeof(scsi_toc_track) > length)
		toc->last_track = length / sizeof(scsi_toc_track) + toc->first_track;
 
	dump_toc(toc);
	return B_OK;
}
 
 
status_t
read_cdda_data(int fd, off_t endFrame, off_t offset, void *data, size_t length,
	off_t bufferOffset, void *buffer, size_t bufferSize)
{
	if (bufferOffset >= 0 && bufferOffset <= offset + (off_t)length
		&& bufferOffset + (off_t)bufferSize > offset) {
		if (offset >= bufferOffset) {
			// buffer reaches into the beginning of the request
			off_t dataOffset = offset - bufferOffset;
			size_t bytes = min_c(bufferSize - dataOffset, length);
			if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
				return B_BAD_ADDRESS;
 
			data = (void *)((uint8 *)data + bytes);
			length -= bytes;
			offset += bytes;
		} else if (offset < bufferOffset
			&& offset + length < bufferOffset + bufferSize) {
			// buffer overlaps at the end of the request
			off_t dataOffset = bufferOffset - offset;
			size_t bytes = length - dataOffset;
			if (user_memcpy((uint8 *)data + dataOffset, buffer, bytes) < B_OK)
				return B_BAD_ADDRESS;
 
			length -= bytes;
		}
		// we don't handle the case where we would need to split the request
	}
 
	while (length > 0) {
		off_t frame = offset / kFrameSize;
		uint32 count = bufferSize / kFrameSize;
		if (frame + count > endFrame)
			count = endFrame - frame;
 
		status_t status = read_frames(fd, frame, (uint8 *)buffer, count);
		if (status < B_OK)
			return status;
 
		off_t dataOffset = offset % kFrameSize;
		size_t bytes = bufferSize - dataOffset;
		if (bytes > length)
			bytes = length;
 
		if (user_memcpy(data, (uint8 *)buffer + dataOffset, bytes) < B_OK)
			return B_BAD_ADDRESS;
 
		data = (void *)((uint8 *)data + bytes);
		length -= bytes;
		offset += bytes;
	}
 
	return B_OK;
}

V595 The 'lastPack' pointer was utilized before it was verified against nullptr. Check lines: 369, 384.