/* $Id: glue-storage.c,v 1.30 2009-04-25 15:12:54 potyra Exp $ 
 *
 * Copyright (C) 2007-2009 FAUmachine Team <info@faumachine.org>.
 * This program is free software. You can redistribute it and/or modify it
 * under the terms of the GNU General Public License, either version 2 of
 * the License, or (at your option) any later version. See COPYING.
 */

#define DEBUG_CONTROL_FLOW	0

#define STORAGE_BLOCKSIZE	4096

#include "config.h"

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>

#include "glue-log.h"
#include "glue-main.h"
#include "glue-shm.h"
#include "glue-storage.h"
#include "simsetup.h"

static int storage_sparse = 0;


static uint8_t *
storage_simple_cache(struct storage_simple *s, unsigned long long pos)
{
	unsigned int nr;
	struct storage_simple_cache *act;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p 0x%llx\n", __FUNCTION__, s, pos);
#endif

	assert(! (pos & (STORAGE_BLOCKSIZE - 1)));
	assert(s->lru_first);
	assert(s->lru_last);

	for (nr = 0; ; nr++) {
		if (nr == sizeof(s->cache) / sizeof(s->cache[0])) {
			/* Not found in cache. */
			int ret;
			uint8_t *aret;

			act = s->lru_last;
			if (act->pos != -1) {
				ret = munmap(act->buf, STORAGE_BLOCKSIZE);
				assert(0 <= ret);
			}
			aret = mmap((void *) 0, STORAGE_BLOCKSIZE,
					s->wflag
						? (PROT_READ | PROT_WRITE)
						: PROT_READ,
					MAP_SHARED,
					s->fd, pos);
			assert(aret != MAP_FAILED);
			act->buf = aret;
			act->pos = pos;
			break;
		}
		if (s->cache[nr].pos == pos) {
			/* Found in cache. */
			act = &s->cache[nr];
			break;
		}
	}

	/* Remove from old LRU list. */
	if (act->lru_prev) {
		act->lru_prev->lru_next = act->lru_next;
	} else {
		s->lru_first = act->lru_next;
	}
	if (act->lru_next) {
		act->lru_next->lru_prev = act->lru_prev;
	} else {
		s->lru_last = act->lru_prev;
	}

	/* Add to new LRU list. */
	act->lru_prev = (struct storage_simple_cache *) 0;
	act->lru_next = s->lru_first;
	s->lru_first = act;
	act->lru_next->lru_prev = act;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p (%d) %p\n", __FUNCTION__,
			act, act - &s->cache[0], act->buf);
#endif

	return act->buf;
}

static unsigned long long
storage_simple_size(struct storage_simple *s)
{
	assert(0 <= s->fd);

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: 0x%llx\n", __FUNCTION__, s->size);
#endif

	return s->size;
}

static int
storage_simple_open(struct storage_simple *s, const char *path, int wflag)
{
	unsigned int nr;
	signed long long lret;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %s %d\n", __FUNCTION__,
			s, path, wflag);
#endif

	assert(s->fd <= 0);

	/*
	 * Open file.
	 */
	s->fd = open(path, wflag ? O_RDWR : O_RDONLY);
	if (s->fd < 0) {
#if DEBUG_CONTROL_FLOW
		fprintf(stderr, "%s: bad open\n", __FUNCTION__);
#endif
		return -1;
	}
	s->wflag = wflag;

	/*
	 * Get size.
	 */
	s->size = lseek(s->fd, (off_t) 0, SEEK_END);
	lret = s->size;
	if (lret < 0) {
		(void) close(s->fd);
		s->fd = -1;
#if DEBUG_CONTROL_FLOW
		fprintf(stderr, "%s: bad lseek\n", __FUNCTION__);
#endif
		return -1;
	}

	/*
	 * Initialize cache.
	 */
	for (nr = 0; nr < sizeof(s->cache) / sizeof(s->cache[0]); nr++) {
		s->cache[nr].lru_prev = (nr == 0)
			? (struct storage_simple_cache *) 0
			: &s->cache[nr - 1];
		s->cache[nr].lru_next = (nr == sizeof(s->cache) / sizeof(s->cache[0]) - 1)
			? (struct storage_simple_cache *) 0
			: &s->cache[nr + 1];
		s->cache[nr].pos = -1;
	}

	s->lru_first = &s->cache[0];
	s->lru_last = &s->cache[sizeof(s->cache) / sizeof(s->cache[0]) - 1];

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: done (size=%lld)\n", __FUNCTION__, s->size);
#endif

	return 0;
}

static int
storage_simple_close(struct storage_simple *s)
{
	unsigned int nr;
	int ret;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p\n", __FUNCTION__, s);
#endif

	for (nr = 0; nr < sizeof(s->cache) / sizeof(s->cache[0]); nr++) {
		if (s->cache[nr].pos != -1) {
#if DEBUG_CONTROL_FLOW
			fprintf(stderr, "%s: %p unmapping %d\n", __FUNCTION__,
					s, nr);
#endif
			ret = munmap(s->cache[nr].buf, STORAGE_BLOCKSIZE);
			assert(0 <= ret);
			s->cache[nr].pos = -1;
		}
	}

	ret = close(s->fd);
	assert(0 <= ret);

	s->fd = -1;

	return 0;
}

static void
storage_simple_init(struct storage_simple *s)
{
	unsigned int nr;

	s->fd = -1;

	for (nr = 0; nr < sizeof(s->cache) / sizeof(s->cache[0]); nr++) {
		s->cache[nr].buf = (uint8_t *) 0;
		s->cache[nr].pos = -1;
	}
}

static int
storage_read_block(
	struct storage *s,
	uint8_t *buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	uint8_t *cache;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d 0x%llx\n", __FUNCTION__,
			s, buflen, pos);
#endif

	assert(0 < buflen);
	assert((pos & (STORAGE_BLOCKSIZE - 1)) + buflen <= STORAGE_BLOCKSIZE);

	if (s->cow) {
		unsigned long addr;
		unsigned int bit;

		addr = (pos / STORAGE_BLOCKSIZE) / 8;
		bit  = (pos / STORAGE_BLOCKSIZE) % 8;

		cache = storage_simple_cache(&s->media_map,
				addr & ~(STORAGE_BLOCKSIZE - 1));

		if ((cache[addr & (STORAGE_BLOCKSIZE - 1)] >> bit) & 1) {
			/* Use modified data from media.cow-file. */
			cache = storage_simple_cache(&s->media_cow,
					pos & ~(STORAGE_BLOCKSIZE - 1));
		} else {
			/* Use original data from media-file. */
			cache = storage_simple_cache(&s->media,
					pos & ~(STORAGE_BLOCKSIZE - 1));
		}
		memcpy(buf, cache + (pos & (STORAGE_BLOCKSIZE - 1)), buflen);

	} else {
		cache = storage_simple_cache(&s->media,
				pos & ~(STORAGE_BLOCKSIZE - 1));

		memcpy(buf, cache + (pos & (STORAGE_BLOCKSIZE - 1)), buflen);
	}

	return buflen;
}

static int
storage_write_block(
	struct storage *s,
	const uint8_t *buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	uint8_t *cache;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d 0x%llx\n", __FUNCTION__,
			s, buflen, pos);
#endif

	assert(0 < buflen);
	assert((pos & (STORAGE_BLOCKSIZE - 1)) + buflen <= STORAGE_BLOCKSIZE);

	if (s->cow) {
		unsigned long addr;
		unsigned int bit;

		addr = (pos / STORAGE_BLOCKSIZE) / 8;
		bit  = (pos / STORAGE_BLOCKSIZE) % 8;

		cache = storage_simple_cache(&s->media_map,
				addr & ~(STORAGE_BLOCKSIZE - 1));

		if (! ((cache[addr & (STORAGE_BLOCKSIZE - 1)] >> bit) & 1)) {
			cache[addr & (STORAGE_BLOCKSIZE - 1)] |= 1 << bit;
			if (buflen < STORAGE_BLOCKSIZE) {
				/* Copy original data to media.cow file. */
				const uint8_t *src;
				uint8_t *dst;

				src = storage_simple_cache(&s->media,
						pos & ~(STORAGE_BLOCKSIZE - 1));
				dst = storage_simple_cache(&s->media_cow,
						pos & ~(STORAGE_BLOCKSIZE - 1));
				memcpy(dst, src, STORAGE_BLOCKSIZE);
			}
		}
		cache = storage_simple_cache(&s->media_cow,
				pos & ~(STORAGE_BLOCKSIZE - 1));

		memcpy(cache + (pos & (STORAGE_BLOCKSIZE - 1)), buf, buflen);

	} else {
		cache = storage_simple_cache(&s->media,
				pos & ~(STORAGE_BLOCKSIZE - 1));

		memcpy(cache + (pos & (STORAGE_BLOCKSIZE - 1)), buf, buflen);
	}

	return buflen;
}

int
storage_read(
	struct storage *s,
	void *_buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	uint8_t *buf = (uint8_t *) _buf;
	unsigned int size;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d 0x%llx\n", __FUNCTION__,
			s, buflen, pos);
#endif

	size = 0;
	while (0 < buflen) {
		unsigned int count;

		count = buflen;
		if (STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1)) < count) {
			count = STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1));
		}

		storage_read_block(s, buf, count, pos);

		pos += count;
		buf += count;
		buflen -= count;
		size += count;
	}

	return size;
}

int
storage_write(
	struct storage *s,
	const void *_buf,
	unsigned int buflen,
	unsigned long long pos
)
{
	const uint8_t *buf = (const uint8_t *) _buf;
	unsigned int size;

#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d 0x%llx\n", __FUNCTION__,
			s, buflen, pos);
#endif

	size = 0;
	while (0 < buflen) {
		unsigned int count;

		count = buflen;
		if (STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1)) < count) {
			count = STORAGE_BLOCKSIZE - (pos & (STORAGE_BLOCKSIZE - 1));
		}

		storage_write_block(s, buf, count, pos);

		pos += count;
		buf += count;
		buflen -= count;
		size += count;
	}

	return size;
}

int
storage_read_write(
	int command,
	struct storage *media,
	unsigned char *buffer,
	unsigned long long pos,
	unsigned long count
)
{
	switch (command) {
	case IO_READ:
		return storage_read(media, buffer, count, pos);
	case IO_WRITE:
		return storage_write(media, buffer, count, pos);
	default:
		assert(0);
	}
}

unsigned long long
storage_size(struct storage *s)
{
	return storage_simple_size(&s->media);
}

int
storage_open(struct storage *s, int wflag)
{
#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %d\n", __FUNCTION__, s, wflag);
#endif

	if (s->cow) {
		char cow_path[1024];
		char map_path[1024];

		if (storage_simple_open(&s->media, s->path, 0) < 0) {
			return -1;
		}
		sprintf(cow_path, "%s.cow", s->path);
		if (storage_simple_open(&s->media_cow, cow_path, wflag) < 0) {
			storage_simple_close(&s->media);
			return -1;
		}
		sprintf(map_path, "%s.map", s->path);
		if (storage_simple_open(&s->media_map, map_path, wflag) < 0) {
			storage_simple_close(&s->media_cow);
			storage_simple_close(&s->media);
			return -1;
		}
		return 0;

	} else {
		return storage_simple_open(&s->media, s->path, wflag);
	}
}

int
storage_close(struct storage *s)
{
#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p\n", __FUNCTION__, s);
#endif

	if (s->cow) {
		if (storage_simple_close(&s->media_map) < 0
		 || storage_simple_close(&s->media_cow) < 0
		 || storage_simple_close(&s->media) < 0) {
			return -1;
		}
		return 0;
	} else {
		return storage_simple_close(&s->media);
	}
}

void
storage_change(struct storage *s, unsigned char *path)
{
#if DEBUG_CONTROL_FLOW
	fprintf(stderr, "%s: %p %s\n", __FUNCTION__, s, path);
#endif

	strcpy(s->path, path);
}

void
storage_init(struct storage *s)
{
	storage_simple_init(&s->media);
	if (s->cow) {
		storage_simple_init(&s->media_cow);
		storage_simple_init(&s->media_map);
	}
}

static int
xread(int fd, unsigned char *buf, unsigned int buflen)
{
	unsigned int count;
	int ret;

	count = 0;
	while (0 < buflen) {
		ret = read(fd, buf, buflen);
		if (ret < 0) {
			return ret;
		}
		if (ret == 0) {
			break;
		}
		buf += ret;
		buflen -= ret;
		count += ret;
	}
	return count;
}

static int
xwrite(int fd, const unsigned char *buf, unsigned int buflen)
{
	unsigned int count;
	int ret;

	count = 0;
	while (0 < buflen) {
		ret = write(fd, buf, buflen);
		if (ret < 0) {
			return ret;
		}
		if (ret == 0) {
			break;
		}
		buf += ret;
		buflen -= ret;
		count += ret;
	}
	return count;
}

static int
storage_create_empty(const char *name, unsigned long long size)
{
	int fd;
	int ret;

	fd = open(name, O_WRONLY | O_CREAT, 0666);
	assert(0 <= fd);

	if (storage_sparse) {
		off_t off;
		uint8_t byte;

		if (0 < size) {
			off = lseek(fd, size - sizeof(byte), SEEK_SET);
			assert(off == size - sizeof(byte));

			ret = xwrite(fd, &byte, sizeof(byte));
			assert(ret == sizeof(byte));
		}

	} else {
#if defined(HAVE_POSIX_FALLOCATE)
		ret = 0;
		if (0 < size) {
			ret = posix_fallocate(fd, 0, size);
		}
#else
		while (0 < size) {
			static const uint8_t buffer[1024*1024];
			unsigned int count;

			if (size < sizeof(buffer)) {
				count = size;
			} else {
				count = sizeof(buffer);
			}

			ret = xwrite(fd, buffer, count);
			assert(0 < ret);
			assert(ret <= count);

			size -= ret;
		}
#endif
	}

	ret = fsync(fd);
	assert(0 <= ret);

	ret = close(fd);
	assert(0 <= ret);

	return 0;
}

static int
storage_copy_file(const char *dst, const char *src)
{
	int dst_fd;
	int src_fd;
	int ret;

	dst_fd = open(dst, O_WRONLY | O_CREAT, 0666);
	assert(0 <= dst_fd);

	src_fd = open(src, O_RDONLY);
	assert(0 <= src_fd);

	for (;;) {
		uint8_t buffer[1024*1024];
		int len;

		len = xread(src_fd, buffer, sizeof(buffer));
		if (len == 0) {
			break;
		}
		assert(0 < len);
		ret = xwrite(dst_fd, buffer, len);
		assert(ret == len);
	}
	ret = fsync(dst_fd);
	assert(0 <= ret);

	ret = close(src_fd);
	assert(0 <= ret);

	ret = close(dst_fd);
	assert(0 <= ret);

	return 0;
}

void
storage_create(
	struct storage *media,
	const char *name,
	int wflag,
	const char *image,
	int _size,
	int blocksize,
	int create,
	int cow,
	int sync_out
)
{
	unsigned long long size = (unsigned long long) _size * 1024*1024;
	char path_cow[1024];
	char path_map[1024];
	int ret;

	if (name[0] == '\0') {
		sprintf(media->path, "%s", name);
	} else {
		sprintf(media->path, "%s/%s", basedir, name);
	}

	media->cow = cow;

	if (name[0] == '\0') {
		assert(! wflag);
		assert(! cow);
		return;
	}

	sprintf(path_cow, "%s.cow", media->path);
	sprintf(path_map, "%s.map", media->path);

	time_stop();

	if (wflag) {
		if (create
		 || access(media->path, R_OK | W_OK) < 0) {
			(void) unlink(media->path);
			if (cow) {
				(void) unlink(path_cow);
				(void) unlink(path_map);
			}

			if (! image
			 || ! *image) {
				/* Create empty file. */
				ret = storage_create_empty(media->path, size);
				assert(0 <= ret);

			} else {
				/* Copy from image. */
				if (cow) {
					ret = symlink(image, media->path);
					assert(0 <= ret);
				} else {
					ret = storage_copy_file(media->path,
							image);
					assert(0 <= ret);
				}
			}
			if (cow) {
				int fd;

				/* Get real size of media file. */
				/* size might be wrong in case of given image. */
				fd = open(media->path, O_RDONLY);
				assert(0 <= fd);
				size = lseek(fd, 0, SEEK_END);
				ret = close(fd);
				assert(0 <= ret);

				/* Create *.cow file. */
				ret = storage_create_empty(path_cow, size);
				assert(0 <= ret);

				/* Create *.map file. */
				size /= STORAGE_BLOCKSIZE;	/* # bits */
				size += 7;			/* round up */
				size /= 8;			/* # bytes */
				ret = storage_create_empty(path_map, size);
				assert(0 <= ret);
			}
		} else {
			/* Use existing image */
		}
	} else {
		if (access(media->path, R_OK | W_OK) < 0) {
			if (! image
			 || ! *image) {
				/* Bad Configuration */
				faum_log(FAUM_LOG_ERROR, "storage", name,
						"Configuration error.");
			} else {
				ret = symlink(image, media->path);
				assert(0 <= ret);
			}
		} else {
			/* Use existing image */
		}
	}

	time_cont();
}

void
storage_destroy(struct storage *media)
{
	/* Do nothing... */
}

void
storage_usage(void)
{
	fprintf(stderr, " \t--sparse: Use sparse files for media images.\n");
}

void
storage_handle_args(int *argc, char **argv)
{
	int na = *argc;
	const char *sp = getenv("FAUM_SPARSE");
	if (sp != NULL) {
		if (atoi(sp) == 1) {
			storage_sparse = 1;
		}
	}

	while (na > 0) {
		assert(argv != NULL);
		if (*argv && strcmp(*argv, "--sparse") == 0) {
			storage_sparse = 1;
			/* remove this arguments. */
			(*argc)--; na--;
			memmove(argv, argv + 1, (na + 1) * sizeof(char *));
			continue;
		}

		na--; argv++;
	}	
}
