patch-2.4.8 linux/fs/buffer.c
Next file: linux/fs/dcache.c
Previous file: linux/fs/block_dev.c
Back to the patch index
Back to the overall index
- Lines: 396
- Date:
Fri Aug 10 16:35:26 2001
- Orig file:
v2.4.7/linux/fs/buffer.c
- Orig date:
Wed Jul 25 17:10:24 2001
diff -u --recursive --new-file v2.4.7/linux/fs/buffer.c linux/fs/buffer.c
@@ -113,19 +113,17 @@
*/
union bdflush_param {
struct {
- int nfract; /* Percentage of buffer cache dirty to
- activate bdflush */
- int ndirty; /* Maximum number of dirty blocks to write out per
- wake-cycle */
- int nrefill; /* Number of clean buffers to try to obtain
- each time we call refill */
- int dummy1; /* unused */
- int interval; /* jiffies delay between kupdate flushes */
- int age_buffer; /* Time for normal buffer to age before we flush it */
- int nfract_sync; /* Percentage of buffer cache dirty to
- activate bdflush synchronously */
- int dummy2; /* unused */
- int dummy3; /* unused */
+ int nfract; /* Percentage of buffer cache dirty to
+ activate bdflush */
+ int dummy1; /* old "ndirty" */
+ int dummy2; /* old "nrefill" */
+ int dummy3; /* unused */
+ int interval; /* jiffies delay between kupdate flushes */
+ int age_buffer; /* Time for normal buffer to age before we flush it */
+ int nfract_sync;/* Percentage of buffer cache dirty to
+ activate bdflush synchronously */
+ int dummy4; /* unused */
+ int dummy5; /* unused */
} b_un;
unsigned int data[N_PARAM];
} bdf_prm = {{30, 64, 64, 256, 5*HZ, 30*HZ, 60, 0, 0}};
@@ -184,34 +182,30 @@
/*
* The buffers have been marked clean and locked. Just submit the dang
* things..
- *
- * We'll wait for the first one of them - "sync" is not exactly
- * performance-critical, and this makes us not hog the IO subsystem
- * completely, while still allowing for a fair amount of concurrent IO.
*/
static void write_locked_buffers(struct buffer_head **array, unsigned int count)
{
- struct buffer_head *wait = *array;
- get_bh(wait);
do {
struct buffer_head * bh = *array++;
bh->b_end_io = end_buffer_io_sync;
submit_bh(WRITE, bh);
} while (--count);
- wait_on_buffer(wait);
- put_bh(wait);
}
+/*
+ * Write some buffers from the head of the dirty queue.
+ *
+ * This must be called with the LRU lock held, and will
+ * return without it!
+ */
#define NRSYNC (32)
-static void write_unlocked_buffers(kdev_t dev)
+static int write_some_buffers(kdev_t dev)
{
struct buffer_head *next;
struct buffer_head *array[NRSYNC];
unsigned int count;
int nr;
-repeat:
- spin_lock(&lru_list_lock);
next = lru_list[BUF_DIRTY];
nr = nr_buffers_type[BUF_DIRTY] * 2;
count = 0;
@@ -223,33 +217,49 @@
continue;
if (test_and_set_bit(BH_Lock, &bh->b_state))
continue;
- get_bh(bh);
if (atomic_set_buffer_clean(bh)) {
__refile_buffer(bh);
+ get_bh(bh);
array[count++] = bh;
if (count < NRSYNC)
continue;
spin_unlock(&lru_list_lock);
write_locked_buffers(array, count);
- goto repeat;
+ return -EAGAIN;
}
unlock_buffer(bh);
- put_bh(bh);
+ __refile_buffer(bh);
}
spin_unlock(&lru_list_lock);
if (count)
write_locked_buffers(array, count);
+ return 0;
}
-static int wait_for_locked_buffers(kdev_t dev, int index, int refile)
+/*
+ * Write out all buffers on the dirty list.
+ */
+static void write_unlocked_buffers(kdev_t dev)
+{
+ do {
+ spin_lock(&lru_list_lock);
+ } while (write_some_buffers(dev));
+ run_task_queue(&tq_disk);
+}
+
+/*
+ * Wait for a buffer on the proper list.
+ *
+ * This must be called with the LRU lock held, and
+ * will return with it released.
+ */
+static int wait_for_buffers(kdev_t dev, int index, int refile)
{
struct buffer_head * next;
int nr;
-repeat:
- spin_lock(&lru_list_lock);
next = lru_list[index];
nr = nr_buffers_type[index] * 2;
while (next && --nr >= 0) {
@@ -268,12 +278,26 @@
spin_unlock(&lru_list_lock);
wait_on_buffer (bh);
put_bh(bh);
- goto repeat;
+ return -EAGAIN;
}
spin_unlock(&lru_list_lock);
return 0;
}
+static inline void wait_for_some_buffers(kdev_t dev)
+{
+ spin_lock(&lru_list_lock);
+ wait_for_buffers(dev, BUF_LOCKED, 1);
+}
+
+static int wait_for_locked_buffers(kdev_t dev, int index, int refile)
+{
+ do {
+ spin_lock(&lru_list_lock);
+ } while (wait_for_buffers(dev, index, refile));
+ return 0;
+}
+
/* Call sync_buffers with wait!=0 to ensure that the call does not
* return until all buffer writes have completed. Sync() may return
* before the writes have finished; fsync() may not.
@@ -310,24 +334,30 @@
lock_kernel();
sync_inodes_sb(sb);
+ DQUOT_SYNC(dev);
lock_super(sb);
if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb);
unlock_super(sb);
- DQUOT_SYNC(dev);
unlock_kernel();
return sync_buffers(dev, 1);
}
+int fsync_no_super(kdev_t dev)
+{
+ sync_buffers(dev, 0);
+ return sync_buffers(dev, 1);
+}
+
int fsync_dev(kdev_t dev)
{
sync_buffers(dev, 0);
lock_kernel();
- sync_supers(dev);
sync_inodes(dev);
DQUOT_SYNC(dev);
+ sync_supers(dev);
unlock_kernel();
return sync_buffers(dev, 1);
@@ -767,15 +797,16 @@
/*
* We used to try various strange things. Let's not.
* We'll just try to balance dirty buffers, and possibly
- * launder some pages.
+ * launder some pages and do our best to make more memory
+ * available.
*/
static void refill_freelist(int size)
{
- balance_dirty(NODEV);
- if (free_shortage())
- page_launder(GFP_NOFS, 0);
if (!grow_buffers(size)) {
- wakeup_bdflush(1);
+ balance_dirty(NODEV);
+ page_launder(GFP_NOFS, 0);
+ wakeup_bdflush();
+ wakeup_kswapd();
current->policy |= SCHED_YIELD;
__set_current_state(TASK_RUNNING);
schedule();
@@ -1034,7 +1065,6 @@
out:
write_unlock(&hash_table_lock);
spin_unlock(&lru_list_lock);
- touch_buffer(bh);
return bh;
}
@@ -1086,7 +1116,21 @@
if (state < 0)
return;
- wakeup_bdflush(state);
+
+ /* If we're getting into imbalance, start write-out */
+ spin_lock(&lru_list_lock);
+ write_some_buffers(dev);
+
+ /*
+ * And if we're _really_ out of balance, wait for
+ * some of the dirty/locked buffers ourselves and
+ * start bdflush.
+ * This will throttle heavy writers.
+ */
+ if (state > 0) {
+ wait_for_some_buffers(dev);
+ wakeup_bdflush();
+ }
}
static __inline__ void __mark_dirty(struct buffer_head *bh)
@@ -1187,6 +1231,7 @@
struct buffer_head * bh;
bh = getblk(dev, block, size);
+ touch_buffer(bh);
if (buffer_uptodate(bh))
return bh;
ll_rw_block(READ, 1, &bh);
@@ -2408,7 +2453,7 @@
loop = 1;
goto cleaned_buffers_try_again;
}
- wakeup_bdflush(0);
+ wakeup_bdflush();
}
return 0;
}
@@ -2532,69 +2577,11 @@
* a limited number of buffers to the disks and then go back to sleep again.
*/
-/* This is the _only_ function that deals with flushing async writes
- to disk.
- NOTENOTENOTENOTE: we _only_ need to browse the DIRTY lru list
- as all dirty buffers lives _only_ in the DIRTY lru list.
- As we never browse the LOCKED and CLEAN lru lists they are infact
- completly useless. */
-static int flush_dirty_buffers(int check_flushtime)
-{
- struct buffer_head * bh, *next;
- int flushed = 0, i;
-
- restart:
- spin_lock(&lru_list_lock);
- bh = lru_list[BUF_DIRTY];
- if (!bh)
- goto out_unlock;
- for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) {
- next = bh->b_next_free;
-
- if (!buffer_dirty(bh)) {
- __refile_buffer(bh);
- continue;
- }
- if (buffer_locked(bh))
- continue;
-
- if (check_flushtime) {
- /* The dirty lru list is chronologically ordered so
- if the current bh is not yet timed out,
- then also all the following bhs
- will be too young. */
- if (time_before(jiffies, bh->b_flushtime))
- goto out_unlock;
- } else {
- if (++flushed > bdf_prm.b_un.ndirty)
- goto out_unlock;
- }
-
- /* OK, now we are committed to write it out. */
- get_bh(bh);
- spin_unlock(&lru_list_lock);
- ll_rw_block(WRITE, 1, &bh);
- put_bh(bh);
-
- if (current->need_resched)
- schedule();
- goto restart;
- }
- out_unlock:
- spin_unlock(&lru_list_lock);
-
- return flushed;
-}
-
DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);
-void wakeup_bdflush(int block)
+void wakeup_bdflush(void)
{
- if (waitqueue_active(&bdflush_wait))
- wake_up_interruptible(&bdflush_wait);
-
- if (block)
- flush_dirty_buffers(0);
+ wake_up_interruptible(&bdflush_wait);
}
/*
@@ -2608,13 +2595,22 @@
static int sync_old_buffers(void)
{
lock_kernel();
- sync_supers(0);
sync_unlocked_inodes();
+ sync_supers(0);
unlock_kernel();
- flush_dirty_buffers(1);
- /* must really sync all the active I/O request to disk here */
- run_task_queue(&tq_disk);
+ for (;;) {
+ struct buffer_head *bh;
+
+ spin_lock(&lru_list_lock);
+ bh = lru_list[BUF_DIRTY];
+ if (!bh || time_before(jiffies, bh->b_flushtime))
+ break;
+ if (write_some_buffers(NODEV))
+ continue;
+ return 0;
+ }
+ spin_unlock(&lru_list_lock);
return 0;
}
@@ -2685,7 +2681,7 @@
int bdflush(void *startup)
{
struct task_struct *tsk = current;
- int flushed;
+
/*
* We have a bare-bones task_struct, and really should fill
* in a few more things so "top" and /proc/2/{exe,root,cwd}
@@ -2708,15 +2704,9 @@
for (;;) {
CHECK_EMERGENCY_SYNC
- flushed = flush_dirty_buffers(0);
-
- /*
- * If there are still a lot of dirty buffers around,
- * skip the sleep and flush some more. Otherwise, we
- * go to sleep waiting a wakeup.
- */
- if (!flushed || balance_dirty_state(NODEV) < 0) {
- run_task_queue(&tq_disk);
+ spin_lock(&lru_list_lock);
+ if (!write_some_buffers(NODEV) || balance_dirty_state(NODEV) < 0) {
+ wait_for_some_buffers(NODEV);
interruptible_sleep_on(&bdflush_wait);
}
}
@@ -2747,6 +2737,8 @@
complete((struct completion *)startup);
for (;;) {
+ wait_for_some_buffers(NODEV);
+
/* update interval */
interval = bdf_prm.b_un.interval;
if (interval) {
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)