patch-2.4.19 linux-2.4.19/drivers/md/lvm-snap.c

Next file: linux-2.4.19/drivers/md/lvm.c
Previous file: linux-2.4.19/drivers/md/lvm-internal.h
Back to the patch index
Back to the overall index

diff -urN linux-2.4.18/drivers/md/lvm-snap.c linux-2.4.19/drivers/md/lvm-snap.c
@@ -2,22 +2,22 @@
  * kernel/lvm-snap.c
  *
  * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- *                    Heinz Mauelshagen, Sistina Software (persistent snapshots)
+ *               2000 - 2001 Heinz Mauelshagen, Sistina Software
  *
  * LVM snapshot driver is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2, or (at your option)
  * any later version.
- * 
+ *
  * LVM snapshot driver is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License
  * along with GNU CC; see the file COPYING.  If not, write to
  * the Free Software Foundation, 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA. 
+ * Boston, MA 02111-1307, USA.
  *
  */
 
@@ -28,52 +28,66 @@
  *    23/11/2000 - used cpu_to_le64 rather than my own macro
  *    25/01/2001 - Put LockPage back in
  *    01/02/2001 - A dropped snapshot is now set as inactive
+ *    14/02/2001 - tidied debug statements
+ *    19/02/2001 - changed rawio calls to pass in preallocated buffer_heads
+ *    26/02/2001 - introduced __brw_kiovec to remove a lot of conditional
+ *                 compiles.
+ *    07/03/2001 - fixed COW exception table not persistent on 2.2 (HM)
  *    12/03/2001 - lvm_pv_get_number changes:
  *                 o made it static
  *                 o renamed it to _pv_get_number
  *                 o pv number is returned in new uint * arg
  *                 o -1 returned on error
  *                 lvm_snapshot_fill_COW_table has a return value too.
+ *    15/10/2001 - fix snapshot alignment problem [CM]
+ *               - fix snapshot full oops (always check lv_block_exception) [CM]
  *
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
 #include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/iobuf.h>
 #include <linux/lvm.h>
+#include <linux/devfs_fs_kernel.h>
 
 
 #include "lvm-internal.h"
 
-static char *lvm_snap_version __attribute__ ((unused)) =
-   "LVM "LVM_RELEASE_NAME" snapshot code ("LVM_RELEASE_DATE")\n";
+static char *lvm_snap_version __attribute__ ((unused)) = "LVM "LVM_RELEASE_NAME" snapshot code ("LVM_RELEASE_DATE")\n";
 
 
 extern const char *const lvm_name;
 extern int lvm_blocksizes[];
 
 void lvm_snapshot_release(lv_t *);
+
 static int _write_COW_table_block(vg_t *vg, lv_t *lv, int idx,
-                                 const char **reason);
+				  const char **reason);
 static void _disable_snapshot(vg_t *vg, lv_t *lv);
 
 
-static int _pv_get_number(vg_t * vg, kdev_t rdev, uint *pvn) {
+static inline int __brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
+			       kdev_t dev, unsigned long b[], int size,
+			       lv_t *lv) {
+	return brw_kiovec(rw, nr, iovec, dev, b, size);
+}
+
+
+static int _pv_get_number(vg_t * vg, kdev_t rdev, uint *pvn)
+{
 	uint p;
-	for(p = 0; p < vg->pv_max; p++) {
-		if(vg->pv[p] == NULL)
+	for (p = 0; p < vg->pv_max; p++) {
+		if (vg->pv[p] == NULL)
 			continue;
 
-		if(vg->pv[p]->pv_dev == rdev)
+		if (vg->pv[p]->pv_dev == rdev)
 			break;
-
 	}
 
- 	if(p >= vg->pv_max) {
+	if (p >= vg->pv_max) {
 		/* bad news, the snapshot COW table is probably corrupt */
 		printk(KERN_ERR
 		       "%s -- _pv_get_number failed for rdev = %u\n",
@@ -85,6 +99,7 @@
 	return 0;
 }
 
+
 #define hashfn(dev,block,mask,chunk_size) \
 	((HASHDEV(dev)^((block)/(chunk_size))) & (mask))
 
@@ -129,10 +144,20 @@
 	unsigned long mask = lv->lv_snapshot_hash_mask;
 	int chunk_size = lv->lv_chunk_size;
 
+	if (!hash_table)
+		BUG();
 	hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
 	list_add(&exception->hash, hash_table);
 }
 
+/*
+ * Determine if we already have a snapshot chunk for this block.
+ * Return: 1 if it the chunk already exists
+ *         0 if we need to COW this block and allocate a new chunk
+ *        -1 if the snapshot was disabled because it ran out of space
+ *
+ * We need to be holding at least a read lock on lv->lv_lock.
+ */
 int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
 			     unsigned long pe_start, lv_t * lv)
 {
@@ -142,6 +167,9 @@
 	int chunk_size = lv->lv_chunk_size;
 	lv_block_exception_t * exception;
 
+	if (!lv->lv_block_exception)
+		return -1;
+
 	pe_off = pe_start % chunk_size;
 	pe_adjustment = (*org_sector-pe_off) % chunk_size;
 	__org_start = *org_sector - pe_adjustment;
@@ -166,8 +194,8 @@
 	   or error on this snapshot --> release it */
 	invalidate_buffers(lv_snap->lv_dev);
 
-       /* wipe the snapshot since it's inconsistent now */
-       _disable_snapshot(vg, lv_snap);
+	/* wipe the snapshot since it's inconsistent now */
+	_disable_snapshot(vg, lv_snap);
 
 	for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
 		if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
@@ -186,15 +214,15 @@
 }
 
 static inline int lvm_snapshot_prepare_blocks(unsigned long *blocks,
-					      unsigned long start,
-					      int nr_sectors,
-					      int blocksize)
+					       unsigned long start,
+					       int nr_sectors,
+					       int blocksize)
 {
 	int i, sectors_per_block, nr_blocks;
 
 	sectors_per_block = blocksize / SECTOR_SIZE;
 
-	if(start & (sectors_per_block - 1))
+	if (start & (sectors_per_block - 1))
 		return 0;
 
 	nr_blocks = nr_sectors / sectors_per_block;
@@ -245,49 +273,51 @@
 
 int lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
 {
-       uint pvn;
-       int id = 0, is = lv_snap->lv_remap_ptr;
-       ulong blksize_snap;
-       lv_COW_table_disk_t * lv_COW_table = (lv_COW_table_disk_t *)
-               page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
+	int id = 0, is = lv_snap->lv_remap_ptr;
+	ulong blksize_snap;
+	lv_COW_table_disk_t * lv_COW_table = (lv_COW_table_disk_t *)
+		page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
 
-       if (is == 0)
-               return 0;
+	if (is == 0)
+		return 0;
 
 	is--;
-        blksize_snap =
-               lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
-        is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
+	blksize_snap =
+		lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
+	is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
 
 	memset(lv_COW_table, 0, blksize_snap);
 	for ( ; is < lv_snap->lv_remap_ptr; is++, id++) {
 		/* store new COW_table entry */
-               lv_block_exception_t *be = lv_snap->lv_block_exception + is;
-               if(_pv_get_number(vg, be->rdev_org, &pvn))
-                       goto bad;
+		lv_block_exception_t *be = lv_snap->lv_block_exception + is;
+		uint pvn;
 
-               lv_COW_table[id].pv_org_number = cpu_to_le64(pvn);
-               lv_COW_table[id].pv_org_rsector = cpu_to_le64(be->rsector_org);
-               if(_pv_get_number(vg, be->rdev_new, &pvn))
-                       goto bad;
+		if (_pv_get_number(vg, be->rdev_org, &pvn))
+			goto bad;
 
-               lv_COW_table[id].pv_snap_number = cpu_to_le64(pvn);
-               lv_COW_table[id].pv_snap_rsector =
-                       cpu_to_le64(be->rsector_new);
+		lv_COW_table[id].pv_org_number = cpu_to_le64(pvn);
+		lv_COW_table[id].pv_org_rsector = cpu_to_le64(be->rsector_org);
+
+		if (_pv_get_number(vg, be->rdev_new, &pvn))
+			goto bad;
+
+		lv_COW_table[id].pv_snap_number = cpu_to_le64(pvn);
+		lv_COW_table[id].pv_snap_rsector = cpu_to_le64(be->rsector_new);
 	}
 
-       return 0;
+	return 0;
 
  bad:
-       printk(KERN_ERR "%s -- lvm_snapshot_fill_COW_page failed", lvm_name);
-       return -1;
+	printk(KERN_ERR "%s -- lvm_snapshot_fill_COW_page failed", lvm_name);
+	return -1;
 }
 
 
 /*
  * writes a COW exception table sector to disk (HM)
+ *
+ * We need to hold a write lock on lv_snap->lv_lock.
  */
-
 int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap)
 {
 	int r;
@@ -305,6 +335,10 @@
  * if there is no exception storage space free any longer --> release snapshot.
  *
  * this routine gets called for each _first_ write to a physical chunk.
+ *
+ * We need to hold a write lock on lv_snap->lv_lock.  It is assumed that
+ * lv->lv_block_exception is non-NULL (checked by lvm_snapshot_remap_block())
+ * when this function is called.
  */
 int lvm_snapshot_COW(kdev_t org_phys_dev,
 		     unsigned long org_phys_sector,
@@ -314,8 +348,10 @@
 {
 	const char * reason;
 	unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
+	unsigned long phys_start;
 	int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
-	struct kiobuf * iobuf;
+	struct kiobuf * iobuf = lv_snap->lv_iobuf;
+	unsigned long *blocks = iobuf->blocks;
 	int blksize_snap, blksize_org, min_blksize, max_blksize;
 	int max_sectors, nr_sectors;
 
@@ -345,10 +381,8 @@
 	       org_virt_sector);
 #endif
 
-	iobuf = lv_snap->lv_iobuf;
-
-	blksize_org = lvm_get_blksize(org_phys_dev);
-	blksize_snap = lvm_get_blksize(snap_phys_dev);
+	blksize_org = lvm_sectsize(org_phys_dev);
+	blksize_snap = lvm_sectsize(snap_phys_dev);
 	max_blksize = max(blksize_org, blksize_snap);
 	min_blksize = min(blksize_org, blksize_snap);
 	max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
@@ -356,6 +390,9 @@
 	if (chunk_size % (max_blksize>>9))
 		goto fail_blksize;
 
+	/* Don't change org_start, we need it to fill in the exception table */
+	phys_start = org_start;
+
 	while (chunk_size)
 	{
 		nr_sectors = min(chunk_size, max_sectors);
@@ -363,21 +400,24 @@
 
 		iobuf->length = nr_sectors << 9;
 
-		if(!lvm_snapshot_prepare_blocks(iobuf->blocks, org_start,
-						nr_sectors, blksize_org))
+		if (!lvm_snapshot_prepare_blocks(blocks, phys_start,
+						 nr_sectors, blksize_org))
 			goto fail_prepare;
 
-		if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
-			       iobuf->blocks, blksize_org) != (nr_sectors<<9))
+		if (__brw_kiovec(READ, 1, &iobuf, org_phys_dev, blocks,
+				 blksize_org, lv_snap) != (nr_sectors<<9))
 			goto fail_raw_read;
 
-		if(!lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start,
-						nr_sectors, blksize_snap))
+		if (!lvm_snapshot_prepare_blocks(blocks, snap_start,
+						 nr_sectors, blksize_snap))
 			goto fail_prepare;
 
-		if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
-			       iobuf->blocks, blksize_snap) != (nr_sectors<<9))
+		if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks,
+				 blksize_snap, lv_snap) != (nr_sectors<<9))
 			goto fail_raw_write;
+
+		phys_start += nr_sectors;
+		snap_start += nr_sectors;
 	}
 
 #ifdef DEBUG_SNAPSHOT
@@ -401,24 +441,24 @@
 	return 0;
 
 	/* slow path */
- out:
+out:
 	lvm_drop_snapshot(vg, lv_snap, reason);
 	return 1;
 
- fail_out_of_space:
+fail_out_of_space:
 	reason = "out of space";
 	goto out;
- fail_raw_read:
+fail_raw_read:
 	reason = "read error";
 	goto out;
- fail_raw_write:
+fail_raw_write:
 	reason = "write error";
 	goto out;
- fail_blksize:
+fail_blksize:
 	reason = "blocksize error";
 	goto out;
 
- fail_prepare:
+fail_prepare:
 	reason = "couldn't prepare kiovec blocks "
 		"(start probably isn't block aligned)";
 	goto out;
@@ -441,8 +481,7 @@
 		struct page * page;
 
 		page = alloc_page(GFP_KERNEL);
-		if (!page)
-			goto out;
+		if (!page) goto out;
 
 		iobuf->maplist[i] = page;
 		LockPage(page);
@@ -451,7 +490,8 @@
 	iobuf->offset = 0;
 
 	err = 0;
- out:
+
+out:
 	return err;
 }
 
@@ -515,13 +555,12 @@
 	if (ret) goto out_free_kiovec;
 
 	ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_COW_table_iobuf,
-                                            PAGE_SIZE/SECTOR_SIZE);
+					     PAGE_SIZE/SECTOR_SIZE);
 	if (ret) goto out_free_both_kiovecs;
 
 	ret = lvm_snapshot_alloc_hash_table(lv_snap);
 	if (ret) goto out_free_both_kiovecs;
 
-
 out:
 	return ret;
 
@@ -534,8 +573,7 @@
 	unmap_kiobuf(lv_snap->lv_iobuf);
 	free_kiovec(1, &lv_snap->lv_iobuf);
 	lv_snap->lv_iobuf = NULL;
-	if (lv_snap->lv_snapshot_hash_table != NULL)
-		vfree(lv_snap->lv_snapshot_hash_table);
+	vfree(lv_snap->lv_snapshot_hash_table);
 	lv_snap->lv_snapshot_hash_table = NULL;
 	goto out;
 }
@@ -562,10 +600,10 @@
 	}
 	if (lv->lv_COW_table_iobuf)
 	{
-               kiobuf_wait_for_io(lv->lv_COW_table_iobuf);
-               unmap_kiobuf(lv->lv_COW_table_iobuf);
-               free_kiovec(1, &lv->lv_COW_table_iobuf);
-               lv->lv_COW_table_iobuf = NULL;
+	        kiobuf_wait_for_io(lv->lv_COW_table_iobuf);
+		unmap_kiobuf(lv->lv_COW_table_iobuf);
+		free_kiovec(1, &lv->lv_COW_table_iobuf);
+		lv->lv_COW_table_iobuf = NULL;
 	}
 }
 
@@ -577,11 +615,11 @@
 	int idx_COW_table;
 	uint pvn;
 	ulong snap_pe_start, COW_table_sector_offset,
-		COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
+	      COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
 	ulong blocks[1];
 	kdev_t snap_phys_dev;
 	lv_block_exception_t *be;
-	struct kiobuf * COW_table_iobuf = lv_snap->lv_COW_table_iobuf;
+	struct kiobuf *COW_table_iobuf = lv_snap->lv_COW_table_iobuf;
 	lv_COW_table_disk_t * lv_COW_table =
 	   ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
 
@@ -592,46 +630,47 @@
 	snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
 	snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
 
-	blksize_snap = lvm_get_blksize(snap_phys_dev);
+	blksize_snap = lvm_sectsize(snap_phys_dev);
 
         COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
         idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
 
 	if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap);
 
-       /* sector offset into the on disk COW table */
+	/* sector offset into the on disk COW table */
 	COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
 
         /* COW table block to write next */
 	blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
 
 	/* store new COW_table entry */
-       be = lv_snap->lv_block_exception + idx;
-       if(_pv_get_number(vg, be->rdev_org, &pvn))
-               goto fail_pv_get_number;
-
-       lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(pvn);
-       lv_COW_table[idx_COW_table].pv_org_rsector =
-               cpu_to_le64(be->rsector_org);
-       if(_pv_get_number(vg, snap_phys_dev, &pvn))
-               goto fail_pv_get_number;
-
-       lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(pvn);
-       lv_COW_table[idx_COW_table].pv_snap_rsector =
-               cpu_to_le64(be->rsector_new);
+	be = lv_snap->lv_block_exception + idx;
+	if(_pv_get_number(vg, be->rdev_org, &pvn))
+		goto fail_pv_get_number;
+
+	lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(pvn);
+	lv_COW_table[idx_COW_table].pv_org_rsector =
+		cpu_to_le64(be->rsector_org);
+	if(_pv_get_number(vg, snap_phys_dev, &pvn))
+		goto fail_pv_get_number;
+
+	lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(pvn);
+	lv_COW_table[idx_COW_table].pv_snap_rsector =
+		cpu_to_le64(be->rsector_new);
 
 	COW_table_iobuf->length = blksize_snap;
+	/* COW_table_iobuf->nr_pages = 1; */
 
-	if (brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
-		       blocks, blksize_snap) != blksize_snap)
+	if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
+			 blocks, blksize_snap, lv_snap) != blksize_snap)
 		goto fail_raw_write;
 
-       /* initialization of next COW exception table block with zeroes */
+	/* initialization of next COW exception table block with zeroes */
 	end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1;
 	if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
 	{
 		/* don't go beyond the end */
-               if (idx + 1 >= lv_snap->lv_remap_end) goto out;
+		if (idx + 1 >= lv_snap->lv_remap_end) goto out;
 
 		memset(lv_COW_table, 0, blksize_snap);
 
@@ -640,24 +679,24 @@
 			idx++;
 			snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
 			snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
-			blksize_snap = lvm_get_blksize(snap_phys_dev);
+			blksize_snap = lvm_sectsize(snap_phys_dev);
 			blocks[0] = snap_pe_start >> (blksize_snap >> 10);
 		} else blocks[0]++;
 
-               if (brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
-                                 blocks, blksize_snap) !=
+		if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
+                                 blocks, blksize_snap, lv_snap) !=
                     blksize_snap)
 			goto fail_raw_write;
 	}
 
- out:
+out:
 	return 0;
 
- fail_raw_write:
+fail_raw_write:
 	*reason = "write error";
 	return 1;
 
- fail_pv_get_number:
+fail_pv_get_number:
 	*reason = "_pv_get_number failed";
 	return 1;
 }
@@ -681,5 +720,3 @@
 		       lvm_name, err);
 	}
 }
-
-MODULE_LICENSE("GPL");

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)