patch-2.4.3 linux/drivers/block/loop.c
Next file: linux/drivers/block/nbd.c
Previous file: linux/drivers/block/ll_rw_blk.c
Back to the patch index
Back to the overall index
- Lines: 1168
- Date:
Tue Mar 6 19:35:36 2001
- Orig file:
v2.4.2/linux/drivers/block/loop.c
- Orig date:
Wed Feb 21 18:20:18 2001
diff -u --recursive --new-file v2.4.2/linux/drivers/block/loop.c linux/drivers/block/loop.c
@@ -31,11 +31,14 @@
* max_loop=<1-255> to the kernel on boot.
* Erik I. Bolsų, <eriki@himolde.no>, Oct 31, 1999
*
+ * Completely rewrite request handling to be make_request_fn style and
+ * non blocking, pushing work to a helper thread. Lots of fixes from
+ * Al Viro too.
+ * Jens Axboe <axboe@suse.de>, Nov 2000
+ *
* Still To Fix:
* - Advisory locking is ignored here.
* - Should use an own CAP_* category instead of CAP_SYS_ADMIN
- * - Should use the underlying filesystems/devices read function if possible
- * to support read ahead (and for write)
*
* WARNING/FIXME:
* - The block number as IV passing to low level transfer functions is broken:
@@ -48,6 +51,7 @@
* number.
*/
+#include <linux/config.h>
#include <linux/module.h>
#include <linux/sched.h>
@@ -56,9 +60,13 @@
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/major.h>
-
+#include <linux/wait.h>
+#include <linux/blk.h>
#include <linux/init.h>
#include <linux/devfs_fs_kernel.h>
+#include <linux/smp_lock.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
@@ -66,40 +74,28 @@
#define MAJOR_NR LOOP_MAJOR
-#define DEVICE_NAME "loop"
-#define DEVICE_REQUEST do_lo_request
-#define DEVICE_NR(device) (MINOR(device))
-#define DEVICE_ON(device)
-#define DEVICE_OFF(device)
-#define DEVICE_NO_RANDOM
-#define TIMEOUT_VALUE (6 * HZ)
-#include <linux/blk.h>
-
-#include <linux/slab.h>
static int max_loop = 8;
static struct loop_device *loop_dev;
static int *loop_sizes;
static int *loop_blksizes;
static devfs_handle_t devfs_handle; /* For the directory */
-#define FALSE 0
-#define TRUE (!FALSE)
-
/*
* Transfer functions
*/
static int transfer_none(struct loop_device *lo, int cmd, char *raw_buf,
- char *loop_buf, int size, int real_block)
+ char *loop_buf, int size, int real_block)
{
if (cmd == READ)
memcpy(loop_buf, raw_buf, size);
else
memcpy(raw_buf, loop_buf, size);
+
return 0;
}
static int transfer_xor(struct loop_device *lo, int cmd, char *raw_buf,
- char *loop_buf, int size, int real_block)
+ char *loop_buf, int size, int real_block)
{
char *in, *out, *key;
int i, keysize;
@@ -111,17 +107,18 @@
in = loop_buf;
out = raw_buf;
}
+
key = lo->lo_encrypt_key;
keysize = lo->lo_encrypt_key_size;
- for (i=0; i < size; i++)
+ for (i = 0; i < size; i++)
*out++ = *in++ ^ key[(i & 511) % keysize];
return 0;
}
static int none_status(struct loop_device *lo, struct loop_info *info)
{
- return 0;
-}
+ return 0;
+}
static int xor_status(struct loop_device *lo, struct loop_info *info)
{
@@ -133,7 +130,7 @@
struct loop_func_table none_funcs = {
number: LO_CRYPT_NONE,
transfer: transfer_none,
- init: none_status
+ init: none_status,
};
struct loop_func_table xor_funcs = {
@@ -150,39 +147,41 @@
#define MAX_DISK_SIZE 1024*1024*1024
-static void figure_loop_size(struct loop_device *lo)
+static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev)
{
- int size;
-
- if (S_ISREG(lo->lo_dentry->d_inode->i_mode))
- size = (lo->lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS;
- else {
- kdev_t lodev = lo->lo_device;
- if (blk_size[MAJOR(lodev)])
- size = blk_size[MAJOR(lodev)][MINOR(lodev)] -
+ if (S_ISREG(lo_dentry->d_inode->i_mode))
+ return (lo_dentry->d_inode->i_size - lo->lo_offset) >> BLOCK_SIZE_BITS;
+ if (blk_size[MAJOR(lodev)])
+ return blk_size[MAJOR(lodev)][MINOR(lodev)] -
(lo->lo_offset >> BLOCK_SIZE_BITS);
- else
- size = MAX_DISK_SIZE;
- }
+ return MAX_DISK_SIZE;
+}
- loop_sizes[lo->lo_number] = size;
+static void figure_loop_size(struct loop_device *lo)
+{
+ loop_sizes[lo->lo_number] = compute_loop_size(lo,
+ lo->lo_backing_file->f_dentry,
+ lo->lo_device);
}
-static int lo_send(struct loop_device *lo, char *data, int len, loff_t pos,
- int blksize)
+static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize,
+ loff_t pos)
{
struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
- struct address_space *mapping = lo->lo_dentry->d_inode->i_mapping;
+ struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
struct address_space_operations *aops = mapping->a_ops;
struct page *page;
- char *kaddr;
+ char *kaddr, *data;
unsigned long index;
unsigned size, offset;
+ int len;
index = pos >> PAGE_CACHE_SHIFT;
offset = pos & (PAGE_CACHE_SIZE - 1);
+ len = bh->b_size;
+ data = bh->b_data;
while (len > 0) {
- int IV = index * (PAGE_CACHE_SIZE/blksize) + offset/blksize;
+ int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
size = PAGE_CACHE_SIZE - offset;
if (size > len)
size = len;
@@ -193,7 +192,8 @@
if (aops->prepare_write(file, page, offset, offset+size))
goto unlock;
kaddr = page_address(page);
- if ((lo->transfer)(lo, WRITE, kaddr+offset, data, size, IV))
+ flush_dcache_page(page);
+ if (lo_do_transfer(lo, WRITE, kaddr + offset, data, size, IV))
goto write_fail;
if (aops->commit_write(file, page, offset, offset+size))
goto unlock;
@@ -203,6 +203,7 @@
index++;
pos += size;
UnlockPage(page);
+ deactivate_page(page);
page_cache_release(page);
}
return 0;
@@ -213,6 +214,7 @@
kunmap(page);
unlock:
UnlockPage(page);
+ deactivate_page(page);
page_cache_release(page);
fail:
return -1;
@@ -221,7 +223,7 @@
struct lo_read_data {
struct loop_device *lo;
char *data;
- int blksize;
+ int bsize;
};
static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
@@ -230,16 +232,15 @@
unsigned long count = desc->count;
struct lo_read_data *p = (struct lo_read_data*)desc->buf;
struct loop_device *lo = p->lo;
- int IV = page->index * (PAGE_CACHE_SIZE/p->blksize) + offset/p->blksize;
+ int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize;
if (size > count)
size = count;
kaddr = kmap(page);
- if ((lo->transfer)(lo,READ,kaddr+offset,p->data,size,IV)) {
+ if (lo_do_transfer(lo, READ, kaddr + offset, p->data, size, IV)) {
size = 0;
- printk(KERN_ERR "loop: transfer error block %ld\n",
- page->index);
+ printk(KERN_ERR "loop: transfer error block %ld\n",page->index);
desc->error = -EINVAL;
}
kunmap(page);
@@ -250,160 +251,345 @@
return size;
}
-static int lo_receive(struct loop_device *lo, char *data, int len, loff_t pos,
- int blksize)
+static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize,
+ loff_t pos)
{
- struct file *file = lo->lo_backing_file;
struct lo_read_data cookie;
read_descriptor_t desc;
+ struct file *file;
cookie.lo = lo;
- cookie.data = data;
- cookie.blksize = blksize;
+ cookie.data = bh->b_data;
+ cookie.bsize = bsize;
desc.written = 0;
- desc.count = len;
+ desc.count = bh->b_size;
desc.buf = (char*)&cookie;
desc.error = 0;
+ spin_lock_irq(&lo->lo_lock);
+ file = lo->lo_backing_file;
+ spin_unlock_irq(&lo->lo_lock);
do_generic_file_read(file, &pos, &desc, lo_read_actor);
return desc.error;
}
-static void do_lo_request(request_queue_t * q)
+static inline int loop_get_bs(struct loop_device *lo)
+{
+ int bs = 0;
+
+ if (blksize_size[MAJOR(lo->lo_device)])
+ bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
+ if (!bs)
+ bs = BLOCK_SIZE;
+
+ return bs;
+}
+
+static inline unsigned long loop_get_iv(struct loop_device *lo,
+ unsigned long sector)
+{
+ int bs = loop_get_bs(lo);
+ unsigned long offset, IV;
+
+ IV = sector / (bs >> 9) + lo->lo_offset / bs;
+ offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs;
+ if (offset >= bs)
+ IV++;
+
+ return IV;
+}
+
+static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
{
- int block, offset, len, blksize, size;
- char *dest_addr;
- struct loop_device *lo;
- struct buffer_head *bh;
- struct request *current_request;
loff_t pos;
+ int ret;
+
+ pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
+
+ if (rw == WRITE)
+ ret = lo_send(lo, bh, loop_get_bs(lo), pos);
+ else
+ ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
+
+ return ret;
+}
+
+static void loop_put_buffer(struct buffer_head *bh)
+{
+ if (bh) {
+ kunmap(bh->b_page);
+ __free_page(bh->b_page);
+ kmem_cache_free(bh_cachep, bh);
+ }
+}
+
+/*
+ * Add buffer_head to back of pending list
+ */
+static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&lo->lo_lock, flags);
+ if (lo->lo_bhtail) {
+ lo->lo_bhtail->b_reqnext = bh;
+ lo->lo_bhtail = bh;
+ } else
+ lo->lo_bh = lo->lo_bhtail = bh;
+ spin_unlock_irqrestore(&lo->lo_lock, flags);
+
+ up(&lo->lo_bh_mutex);
+}
+
+/*
+ * Grab first pending buffer
+ */
+static struct buffer_head *loop_get_bh(struct loop_device *lo)
+{
+ struct buffer_head *bh;
+
+ spin_lock_irq(&lo->lo_lock);
+ if ((bh = lo->lo_bh)) {
+ if (bh == lo->lo_bhtail)
+ lo->lo_bhtail = NULL;
+ lo->lo_bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ }
+ spin_unlock_irq(&lo->lo_lock);
+
+ return bh;
+}
+
+/*
+ * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE
+ * and lo->transfer stuff has already been done. if not, it was a READ
+ * so queue it for the loop thread and let it do the transfer out of
+ * b_end_io context (we don't want to do decrypt of a page with irqs
+ * disabled)
+ */
+static void loop_end_io_transfer(struct buffer_head *bh, int uptodate)
+{
+ struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
+
+ if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) {
+ struct buffer_head *rbh = bh->b_private;
+
+ rbh->b_end_io(rbh, uptodate);
+ if (atomic_dec_and_test(&lo->lo_pending))
+ up(&lo->lo_bh_mutex);
+ loop_put_buffer(bh);
+ } else
+ loop_add_bh(lo, bh);
+}
+
+static struct buffer_head *loop_get_buffer(struct loop_device *lo,
+ struct buffer_head *rbh)
+{
+ struct buffer_head *bh;
+
+ do {
+ bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER);
+ if (bh)
+ break;
+
+ run_task_queue(&tq_disk);
+ schedule_timeout(HZ);
+ } while (1);
+ memset(bh, 0, sizeof(*bh));
+
+ bh->b_size = rbh->b_size;
+ bh->b_dev = rbh->b_rdev;
+ spin_lock_irq(&lo->lo_lock);
+ bh->b_rdev = lo->lo_device;
+ spin_unlock_irq(&lo->lo_lock);
+ bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
+
+ /*
+ * easy way out, although it does waste some memory for < PAGE_SIZE
+ * blocks... if highmem bounce buffering can get away with it,
+ * so can we :-)
+ */
+ bh->b_page = alloc_page(GFP_BUFFER);
+ bh->b_data = kmap(bh->b_page);
+
+ bh->b_end_io = loop_end_io_transfer;
+ bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
+ init_waitqueue_head(&bh->b_wait);
+
+ return bh;
+}
+
+static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
+{
+ struct buffer_head *bh = NULL;
+ struct loop_device *lo;
+ unsigned long IV;
+
+ if (!buffer_locked(rbh))
+ BUG();
+
+ if (MINOR(rbh->b_rdev) >= max_loop)
+ goto out;
-repeat:
- INIT_REQUEST;
- current_request=CURRENT;
- blkdev_dequeue_request(current_request);
- if (MINOR(current_request->rq_dev) >= max_loop)
- goto error_out;
- lo = &loop_dev[MINOR(current_request->rq_dev)];
- if (!lo->lo_dentry || !lo->transfer)
- goto error_out;
- if (current_request->cmd == WRITE) {
+ lo = &loop_dev[MINOR(rbh->b_rdev)];
+ spin_lock_irq(&lo->lo_lock);
+ if (lo->lo_state != Lo_bound)
+ goto inactive;
+ atomic_inc(&lo->lo_pending);
+ spin_unlock_irq(&lo->lo_lock);
+
+ if (rw == WRITE) {
if (lo->lo_flags & LO_FLAGS_READ_ONLY)
- goto error_out;
- } else if (current_request->cmd != READ) {
- printk(KERN_ERR "unknown loop device command (%d)?!?",
- current_request->cmd);
- goto error_out;
+ goto err;
+ } else if (rw == READA) {
+ rw = READ;
+ } else if (rw != READ) {
+ printk(KERN_ERR "loop: unknown command (%d)\n", rw);
+ goto err;
}
- dest_addr = current_request->buffer;
- len = current_request->current_nr_sectors << 9;
+#if CONFIG_HIGHMEM
+ rbh = create_bounce(rw, rbh);
+#endif
- blksize = BLOCK_SIZE;
- if (blksize_size[MAJOR(lo->lo_device)]) {
- blksize = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
- if (!blksize)
- blksize = BLOCK_SIZE;
+ /*
+ * file backed, queue for loop_thread to handle
+ */
+ if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
+ if (rw == WRITE)
+ set_bit(BH_Dirty, &rbh->b_state);
+ loop_add_bh(lo, rbh);
+ return 0;
}
- if (lo->lo_flags & LO_FLAGS_DO_BMAP)
- goto file_backed;
+ /*
+ * piggy old buffer on original, and submit for I/O
+ */
+ bh = loop_get_buffer(lo, rbh);
+ bh->b_private = rbh;
+ IV = loop_get_iv(lo, bh->b_rsector);
+ if (rw == WRITE) {
+ set_bit(BH_Dirty, &bh->b_state);
+ if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, bh->b_size, IV))
+ goto err;
+ }
- if (blksize < 512) {
- block = current_request->sector * (512/blksize);
- offset = 0;
+ generic_make_request(rw, bh);
+ return 0;
+
+err:
+ if (atomic_dec_and_test(&lo->lo_pending))
+ up(&lo->lo_bh_mutex);
+ loop_put_buffer(bh);
+out:
+ buffer_IO_error(rbh);
+ return 0;
+inactive:
+ spin_unlock_irq(&lo->lo_lock);
+ goto out;
+}
+
+static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh)
+{
+ int ret;
+
+ /*
+ * For block backed loop, we know this is a READ
+ */
+ if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
+ int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state);
+
+ ret = do_bh_filebacked(lo, bh, rw);
+ bh->b_end_io(bh, !ret);
} else {
- block = current_request->sector / (blksize >> 9);
- offset = (current_request->sector % (blksize >> 9)) << 9;
- }
- block += lo->lo_offset / blksize;
- offset += lo->lo_offset % blksize;
- if (offset >= blksize) {
- block++;
- offset -= blksize;
+ struct buffer_head *rbh = bh->b_private;
+ unsigned long IV = loop_get_iv(lo, rbh->b_rsector);
+
+ ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data,
+ bh->b_size, IV);
+
+ rbh->b_end_io(rbh, !ret);
+ loop_put_buffer(bh);
}
- spin_unlock_irq(&io_request_lock);
+}
- while (len > 0) {
+/*
+ * worker thread that handles reads/writes to file backed loop devices,
+ * to avoid blocking in our make_request_fn. it also does loop decrypting
+ * on reads for block backed loop, as that is too heavy to do from
+ * b_end_io context where irqs may be disabled.
+ */
+static int loop_thread(void *data)
+{
+ struct loop_device *lo = data;
+ struct buffer_head *bh;
- size = blksize - offset;
- if (size > len)
- size = len;
+ daemonize();
+ exit_files(current);
- bh = getblk(lo->lo_device, block, blksize);
- if (!bh) {
- printk(KERN_ERR "loop: device %s: getblk(-, %d, %d) returned NULL",
- kdevname(lo->lo_device),
- block, blksize);
- goto error_out_lock;
- }
- if (!buffer_uptodate(bh) && ((current_request->cmd == READ) ||
- (offset || (len < blksize)))) {
- ll_rw_block(READ, 1, &bh);
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh)) {
- brelse(bh);
- goto error_out_lock;
- }
- }
+ sprintf(current->comm, "loop%d", lo->lo_number);
- if ((lo->transfer)(lo, current_request->cmd,
- bh->b_data + offset,
- dest_addr, size, block)) {
- printk(KERN_ERR "loop: transfer error block %d\n",
- block);
- brelse(bh);
- goto error_out_lock;
- }
+ spin_lock_irq(¤t->sigmask_lock);
+ sigfillset(¤t->blocked);
+ flush_signals(current);
+ spin_unlock_irq(¤t->sigmask_lock);
+
+ current->policy = SCHED_OTHER;
+ current->nice = -20;
+
+ spin_lock_irq(&lo->lo_lock);
+ lo->lo_state = Lo_bound;
+ atomic_inc(&lo->lo_pending);
+ spin_unlock_irq(&lo->lo_lock);
+
+ /*
+ * up sem, we are running
+ */
+ up(&lo->lo_sem);
- if (current_request->cmd == WRITE) {
- mark_buffer_uptodate(bh, 1);
- mark_buffer_dirty(bh);
+ for (;;) {
+ down_interruptible(&lo->lo_bh_mutex);
+ /*
+ * could be upped because of tear-down, not because of
+ * pending work
+ */
+ if (!atomic_read(&lo->lo_pending))
+ break;
+
+ bh = loop_get_bh(lo);
+ if (!bh) {
+ printk("loop: missing bh\n");
+ continue;
}
- brelse(bh);
- dest_addr += size;
- len -= size;
- offset = 0;
- block++;
- }
- goto done;
+ loop_handle_bh(lo, bh);
-file_backed:
- pos = ((loff_t)current_request->sector << 9) + lo->lo_offset;
- spin_unlock_irq(&io_request_lock);
- if (current_request->cmd == WRITE) {
- if (lo_send(lo, dest_addr, len, pos, blksize))
- goto error_out_lock;
- } else {
- if (lo_receive(lo, dest_addr, len, pos, blksize))
- goto error_out_lock;
+ /*
+ * upped both for pending work and tear-down, lo_pending
+ * will hit zero then
+ */
+ if (atomic_dec_and_test(&lo->lo_pending))
+ break;
}
-done:
- spin_lock_irq(&io_request_lock);
- current_request->sector += current_request->current_nr_sectors;
- current_request->nr_sectors -= current_request->current_nr_sectors;
- list_add(¤t_request->queue, &q->queue_head);
- end_request(1);
- goto repeat;
-error_out_lock:
- spin_lock_irq(&io_request_lock);
-error_out:
- list_add(¤t_request->queue, &q->queue_head);
- end_request(0);
- goto repeat;
+
+ up(&lo->lo_sem);
+ return 0;
}
-static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg)
+static int loop_set_fd(struct loop_device *lo, struct file *lo_file, kdev_t dev,
+ unsigned int arg)
{
struct file *file;
struct inode *inode;
- int error;
+ kdev_t lo_device;
+ int lo_flags = 0;
+ int error;
+ int bs;
MOD_INC_USE_COUNT;
error = -EBUSY;
- if (lo->lo_dentry)
+ if (lo->lo_state != Lo_unbound)
goto out;
-
+
error = -EBADF;
file = fget(arg);
if (!file)
@@ -412,24 +598,13 @@
error = -EINVAL;
inode = file->f_dentry->d_inode;
- if (S_ISBLK(inode->i_mode)) {
- /* dentry will be wired, so... */
- error = blkdev_get(inode->i_bdev, file->f_mode,
- file->f_flags, BDEV_FILE);
-
- lo->lo_device = inode->i_rdev;
- lo->lo_flags = 0;
-
- /* Backed by a block device - don't need to hold onto
- a file structure */
- lo->lo_backing_file = NULL;
+ if (!(file->f_mode & FMODE_WRITE))
+ lo_flags |= LO_FLAGS_READ_ONLY;
- if (error)
- goto out_putf;
+ if (S_ISBLK(inode->i_mode)) {
+ lo_device = inode->i_rdev;
} else if (S_ISREG(inode->i_mode)) {
- struct address_space_operations *aops;
-
- aops = inode->i_mapping->a_ops;
+ struct address_space_operations *aops = inode->i_mapping->a_ops;
/*
* If we can't read - sorry. If we only can't write - well,
* it's going to be read-only.
@@ -439,57 +614,50 @@
goto out_putf;
if (!aops->prepare_write || !aops->commit_write)
- lo->lo_flags |= LO_FLAGS_READ_ONLY;
-
- error = get_write_access(inode);
- if (error)
- goto out_putf;
-
- /* Backed by a regular file - we need to hold onto a file
- structure for this file. Friggin' NFS can't live without
- it on write and for reading we use do_generic_file_read(),
- so... We create a new file structure based on the one
- passed to us via 'arg'. This is to avoid changing the file
- structure that the caller is using */
-
- lo->lo_device = inode->i_dev;
- lo->lo_flags |= LO_FLAGS_DO_BMAP;
-
- error = -ENFILE;
- lo->lo_backing_file = get_empty_filp();
- if (lo->lo_backing_file == NULL) {
- put_write_access(inode);
- goto out_putf;
- }
-
- lo->lo_backing_file->f_mode = file->f_mode;
- lo->lo_backing_file->f_pos = file->f_pos;
- lo->lo_backing_file->f_flags = file->f_flags;
- lo->lo_backing_file->f_owner = file->f_owner;
- lo->lo_backing_file->f_dentry = file->f_dentry;
- lo->lo_backing_file->f_vfsmnt = mntget(file->f_vfsmnt);
- lo->lo_backing_file->f_op = fops_get(file->f_op);
- lo->lo_backing_file->private_data = file->private_data;
- file_moveto(lo->lo_backing_file, file);
+ lo_flags |= LO_FLAGS_READ_ONLY;
+ lo_device = inode->i_dev;
+ lo_flags |= LO_FLAGS_DO_BMAP;
error = 0;
- }
+ } else
+ goto out_putf;
+
+ get_file(file);
- if (IS_RDONLY (inode) || is_read_only(lo->lo_device))
- lo->lo_flags |= LO_FLAGS_READ_ONLY;
+ if (IS_RDONLY (inode) || is_read_only(lo_device)
+ || !(lo_file->f_mode & FMODE_WRITE))
+ lo_flags |= LO_FLAGS_READ_ONLY;
- set_device_ro(dev, (lo->lo_flags & LO_FLAGS_READ_ONLY)!=0);
+ set_device_ro(dev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
- lo->lo_dentry = dget(file->f_dentry);
+ lo->lo_device = lo_device;
+ lo->lo_flags = lo_flags;
+ lo->lo_backing_file = file;
lo->transfer = NULL;
lo->ioctl = NULL;
figure_loop_size(lo);
+ lo->old_gfp_mask = inode->i_mapping->gfp_mask;
+ inode->i_mapping->gfp_mask = GFP_BUFFER;
+
+ bs = 0;
+ if (blksize_size[MAJOR(inode->i_rdev)])
+ bs = blksize_size[MAJOR(inode->i_rdev)][MINOR(inode->i_rdev)];
+ if (!bs)
+ bs = BLOCK_SIZE;
+
+ set_blocksize(dev, bs);
+
+ lo->lo_bh = lo->lo_bhtail = NULL;
+ kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+ down(&lo->lo_sem);
+
+ fput(file);
+ return 0;
out_putf:
fput(file);
out:
- if (error)
- MOD_DEC_USE_COUNT;
+ MOD_DEC_USE_COUNT;
return error;
}
@@ -525,27 +693,25 @@
static int loop_clr_fd(struct loop_device *lo, kdev_t dev)
{
- struct dentry *dentry = lo->lo_dentry;
+ struct file *filp = lo->lo_backing_file;
+ int gfp = lo->old_gfp_mask;
- if (!dentry)
+ if (lo->lo_state != Lo_bound)
return -ENXIO;
if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
return -EBUSY;
+ if (filp==NULL)
+ return -EINVAL;
- if (S_ISBLK(dentry->d_inode->i_mode))
- blkdev_put(dentry->d_inode->i_bdev, BDEV_FILE);
+ spin_lock_irq(&lo->lo_lock);
+ lo->lo_state = Lo_rundown;
+ if (atomic_dec_and_test(&lo->lo_pending))
+ up(&lo->lo_bh_mutex);
+ spin_unlock_irq(&lo->lo_lock);
- lo->lo_dentry = NULL;
+ down(&lo->lo_sem);
- if (lo->lo_backing_file != NULL) {
- struct file *filp = lo->lo_backing_file;
- if ((filp->f_mode & FMODE_WRITE) == 0)
- put_write_access(filp->f_dentry->d_inode);
- fput(filp);
- lo->lo_backing_file = NULL;
- } else {
- dput(dentry);
- }
+ lo->lo_backing_file = NULL;
loop_release_xfer(lo);
lo->transfer = NULL;
@@ -554,10 +720,14 @@
lo->lo_encrypt_type = 0;
lo->lo_offset = 0;
lo->lo_encrypt_key_size = 0;
+ lo->lo_flags = 0;
memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
memset(lo->lo_name, 0, LO_NAME_SIZE);
loop_sizes[lo->lo_number] = 0;
invalidate_buffers(dev);
+ filp->f_dentry->d_inode->i_mapping->gfp_mask = gfp;
+ lo->lo_state = Lo_unbound;
+ fput(filp);
MOD_DEC_USE_COUNT;
return 0;
}
@@ -571,7 +741,7 @@
if (lo->lo_encrypt_key_size && lo->lo_key_owner != current->uid &&
!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!lo->lo_dentry)
+ if (lo->lo_state != Lo_bound)
return -ENXIO;
if (copy_from_user(&info, arg, sizeof (struct loop_info)))
return -EFAULT;
@@ -608,15 +778,16 @@
static int loop_get_status(struct loop_device *lo, struct loop_info *arg)
{
struct loop_info info;
+ struct file *file = lo->lo_backing_file;
- if (!lo->lo_dentry)
+ if (lo->lo_state != Lo_bound)
return -ENXIO;
if (!arg)
return -EINVAL;
memset(&info, 0, sizeof(info));
info.lo_number = lo->lo_number;
- info.lo_device = kdev_t_to_nr(lo->lo_dentry->d_inode->i_dev);
- info.lo_inode = lo->lo_dentry->d_inode->i_ino;
+ info.lo_device = kdev_t_to_nr(file->f_dentry->d_inode->i_dev);
+ info.lo_inode = file->f_dentry->d_inode->i_ino;
info.lo_rdevice = kdev_t_to_nr(lo->lo_device);
info.lo_offset = lo->lo_offset;
info.lo_flags = lo->lo_flags;
@@ -634,7 +805,7 @@
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo;
- int dev;
+ int dev, err;
if (!inode)
return -EINVAL;
@@ -647,25 +818,36 @@
if (dev >= max_loop)
return -ENODEV;
lo = &loop_dev[dev];
+ down(&lo->lo_ctl_mutex);
switch (cmd) {
case LOOP_SET_FD:
- return loop_set_fd(lo, inode->i_rdev, arg);
+ err = loop_set_fd(lo, file, inode->i_rdev, arg);
+ break;
case LOOP_CLR_FD:
- return loop_clr_fd(lo, inode->i_rdev);
+ err = loop_clr_fd(lo, inode->i_rdev);
+ break;
case LOOP_SET_STATUS:
- return loop_set_status(lo, (struct loop_info *) arg);
+ err = loop_set_status(lo, (struct loop_info *) arg);
+ break;
case LOOP_GET_STATUS:
- return loop_get_status(lo, (struct loop_info *) arg);
- case BLKGETSIZE: /* Return device size */
- if (!lo->lo_dentry)
- return -ENXIO;
- if (!arg)
- return -EINVAL;
- return put_user(loop_sizes[lo->lo_number] << 1, (long *) arg);
+ err = loop_get_status(lo, (struct loop_info *) arg);
+ break;
+ case BLKGETSIZE:
+ if (lo->lo_state != Lo_bound) {
+ err = -ENXIO;
+ break;
+ }
+ if (!arg) {
+ err = -EINVAL;
+ break;
+ }
+ err = put_user(loop_sizes[lo->lo_number] << 1, (long *) arg);
+ break;
default:
- return lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
+ err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
}
- return 0;
+ up(&lo->lo_ctl_mutex);
+ return err;
}
static int lo_open(struct inode *inode, struct file *file)
@@ -673,7 +855,6 @@
struct loop_device *lo;
int dev, type;
-
if (!inode)
return -EINVAL;
if (MAJOR(inode->i_rdev) != MAJOR_NR) {
@@ -681,23 +862,25 @@
return -ENODEV;
}
dev = MINOR(inode->i_rdev);
- if (dev >= max_loop) {
+ if (dev >= max_loop)
return -ENODEV;
- }
+
lo = &loop_dev[dev];
+ MOD_INC_USE_COUNT;
+ down(&lo->lo_ctl_mutex);
type = lo->lo_encrypt_type;
if (type && xfer_funcs[type] && xfer_funcs[type]->lock)
xfer_funcs[type]->lock(lo);
lo->lo_refcnt++;
- MOD_INC_USE_COUNT;
+ up(&lo->lo_ctl_mutex);
return 0;
}
static int lo_release(struct inode *inode, struct file *file)
{
struct loop_device *lo;
- int dev;
+ int dev, type;
if (!inode)
return 0;
@@ -709,17 +892,16 @@
dev = MINOR(inode->i_rdev);
if (dev >= max_loop)
return 0;
+
lo = &loop_dev[dev];
- if (lo->lo_refcnt <= 0)
- printk(KERN_ERR "lo_release: refcount(%d) <= 0\n",
- lo->lo_refcnt);
- else {
- int type = lo->lo_encrypt_type;
- --lo->lo_refcnt;
- if (xfer_funcs[type] && xfer_funcs[type]->unlock)
- xfer_funcs[type]->unlock(lo);
- MOD_DEC_USE_COUNT;
- }
+ down(&lo->lo_ctl_mutex);
+ type = lo->lo_encrypt_type;
+ --lo->lo_refcnt;
+ if (xfer_funcs[type] && xfer_funcs[type]->unlock)
+ xfer_funcs[type]->unlock(lo);
+
+ up(&lo->lo_ctl_mutex);
+ MOD_DEC_USE_COUNT;
return 0;
}
@@ -732,11 +914,8 @@
/*
* And now the modules code and kernel interface.
*/
-#ifdef MODULE
-#define loop_init init_module
MODULE_PARM(max_loop, "i");
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices (1-255)");
-#endif
int loop_register_transfer(struct loop_func_table *funcs)
{
@@ -767,88 +946,88 @@
EXPORT_SYMBOL(loop_register_transfer);
EXPORT_SYMBOL(loop_unregister_transfer);
-static void no_plug_device(request_queue_t *q, kdev_t device)
-{
-}
-
int __init loop_init(void)
{
int i;
- if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) {
- printk(KERN_WARNING "Unable to get major number %d for loop device\n",
- MAJOR_NR);
- return -EIO;
- }
- devfs_handle = devfs_mk_dir (NULL, "loop", NULL);
- devfs_register_series (devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
- MAJOR_NR, 0,
- S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
- &lo_fops, NULL);
-
if ((max_loop < 1) || (max_loop > 255)) {
- printk (KERN_WARNING "loop: invalid max_loop (must be between 1 and 255), using default (8)\n");
+ printk(KERN_WARNING "loop: invalid max_loop (must be between"
+ " 1 and 255), using default (8)\n");
max_loop = 8;
}
- printk(KERN_INFO "loop: enabling %d loop devices\n", max_loop);
-
- loop_dev = kmalloc (max_loop * sizeof(struct loop_device), GFP_KERNEL);
- if (!loop_dev) {
- printk (KERN_ERR "loop: Unable to create loop_dev\n");
- return -ENOMEM;
+ if (devfs_register_blkdev(MAJOR_NR, "loop", &lo_fops)) {
+ printk(KERN_WARNING "Unable to get major number %d for loop"
+ " device\n", MAJOR_NR);
+ return -EIO;
}
- loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
- if (!loop_sizes) {
- printk (KERN_ERR "loop: Unable to create loop_sizes\n");
- kfree (loop_dev);
- return -ENOMEM;
- }
+ devfs_handle = devfs_mk_dir(NULL, "loop", NULL);
+ devfs_register_series(devfs_handle, "%u", max_loop, DEVFS_FL_DEFAULT,
+ MAJOR_NR, 0,
+ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
+ &lo_fops, NULL);
- loop_blksizes = kmalloc (max_loop * sizeof(int), GFP_KERNEL);
- if (!loop_blksizes) {
- printk (KERN_ERR "loop: Unable to create loop_blksizes\n");
- kfree (loop_dev);
- kfree (loop_sizes);
+ loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
+ if (!loop_dev)
return -ENOMEM;
- }
- blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
- blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), no_plug_device);
- blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
- for (i=0; i < max_loop; i++) {
- memset(&loop_dev[i], 0, sizeof(struct loop_device));
- loop_dev[i].lo_number = i;
+ loop_sizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
+ if (!loop_sizes)
+ goto out_sizes;
+
+ loop_blksizes = kmalloc(max_loop * sizeof(int), GFP_KERNEL);
+ if (!loop_blksizes)
+ goto out_blksizes;
+
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
+
+ for (i = 0; i < max_loop; i++) {
+ struct loop_device *lo = &loop_dev[i];
+ memset(lo, 0, sizeof(struct loop_device));
+ init_MUTEX(&lo->lo_ctl_mutex);
+ init_MUTEX_LOCKED(&lo->lo_sem);
+ init_MUTEX_LOCKED(&lo->lo_bh_mutex);
+ lo->lo_number = i;
+ spin_lock_init(&lo->lo_lock);
}
+
memset(loop_sizes, 0, max_loop * sizeof(int));
memset(loop_blksizes, 0, max_loop * sizeof(int));
blk_size[MAJOR_NR] = loop_sizes;
blksize_size[MAJOR_NR] = loop_blksizes;
- for (i=0; i < max_loop; i++)
- register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &lo_fops, 0);
+ for (i = 0; i < max_loop; i++)
+ register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0);
+ printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
return 0;
+
+out_sizes:
+ kfree(loop_dev);
+out_blksizes:
+ kfree(loop_sizes);
+ printk(KERN_ERR "loop: ran out of memory\n");
+ return -ENOMEM;
}
-#ifdef MODULE
-void cleanup_module(void)
+void loop_exit(void)
{
- devfs_unregister (devfs_handle);
- if (devfs_unregister_blkdev(MAJOR_NR, "loop") != 0)
+ devfs_unregister(devfs_handle);
+ if (devfs_unregister_blkdev(MAJOR_NR, "loop"))
printk(KERN_WARNING "loop: cannot unregister blkdev\n");
- blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
- kfree (loop_dev);
- kfree (loop_sizes);
- kfree (loop_blksizes);
+ kfree(loop_dev);
+ kfree(loop_sizes);
+ kfree(loop_blksizes);
}
-#endif
+
+module_init(loop_init);
+module_exit(loop_exit);
#ifndef MODULE
static int __init max_loop_setup(char *str)
{
- max_loop = simple_strtol(str,NULL,0);
+ max_loop = simple_strtol(str, NULL, 0);
return 1;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)