patch-2.4.4 linux/mm/shmem.c
Next file: linux/mm/swap_state.c
Previous file: linux/mm/page_io.c
Back to the patch index
Back to the overall index
- Lines: 1307
- Date:
Wed Apr 25 14:46:21 2001
- Orig file:
v2.4.3/linux/mm/shmem.c
- Orig date:
Fri Mar 2 15:16:59 2001
diff -u --recursive --new-file v2.4.3/linux/mm/shmem.c linux/mm/shmem.c
@@ -1,5 +1,5 @@
/*
- * Resizable simple shmem filesystem for Linux.
+ * Resizable virtual memory filesystem for Linux.
*
* Copyright (C) 2000 Linus Torvalds.
* 2000 Transmeta Corp.
@@ -9,14 +9,12 @@
*/
/*
- * This shared memory handling is heavily based on the ramfs. It
- * extends the ramfs by the ability to use swap which would makes it a
- * completely usable filesystem.
- *
- * But read and write are not supported (yet)
- *
+ * This virtual memory filesystem is heavily based on the ramfs. It
+ * extends ramfs by the ability to use swap and honor resource limits
+ * which makes it a completely usable filesystem.
*/
+#include <linux/config.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/devfs_fs_kernel.h>
@@ -31,7 +29,8 @@
#include <asm/uaccess.h>
-#define SHMEM_MAGIC 0x01021994
+/* This magic number is used in glibc for posix shared memory */
+#define TMPFS_MAGIC 0x01021994
#define ENTRIES_PER_PAGE (PAGE_SIZE/sizeof(unsigned long))
#define NR_SINGLE (ENTRIES_PER_PAGE + SHMEM_NR_DIRECT)
@@ -42,52 +41,90 @@
static struct inode_operations shmem_inode_operations;
static struct file_operations shmem_dir_operations;
static struct inode_operations shmem_dir_inode_operations;
-static struct vm_operations_struct shmem_shared_vm_ops;
-static struct vm_operations_struct shmem_private_vm_ops;
+static struct inode_operations shmem_symlink_inode_operations;
+static struct vm_operations_struct shmem_vm_ops;
LIST_HEAD (shmem_inodes);
static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
+#define BLOCKS_PER_PAGE (PAGE_SIZE/512)
+
+/*
+ * shmem_recalc_inode - recalculate the size of an inode
+ *
+ * @inode: inode to recalc
+ *
+ * We have to calculate the free blocks since the mm can drop pages
+ * behind our back
+ *
+ * But we know that normally
+ * inodes->i_blocks/BLOCKS_PER_PAGE ==
+ * inode->i_mapping->nrpages + info->swapped
+ *
+ * So the mm freed
+ * inodes->i_blocks/BLOCKS_PER_PAGE -
+ * (inode->i_mapping->nrpages + info->swapped)
+ *
+ * It has to be called with the spinlock held.
+ */
+
+static void shmem_recalc_inode(struct inode * inode)
+{
+ unsigned long freed;
+
+ freed = (inode->i_blocks/BLOCKS_PER_PAGE) -
+ (inode->i_mapping->nrpages + inode->u.shmem_i.swapped);
+ if (freed){
+ struct shmem_sb_info * info = &inode->i_sb->u.shmem_sb;
+ inode->i_blocks -= freed*BLOCKS_PER_PAGE;
+ spin_lock (&info->stat_lock);
+ info->free_blocks += freed;
+ spin_unlock (&info->stat_lock);
+ }
+}
+
static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index)
{
+ unsigned long offset;
+
if (index < SHMEM_NR_DIRECT)
return info->i_direct+index;
index -= SHMEM_NR_DIRECT;
- if (index >= ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
- return NULL;
+ offset = index % ENTRIES_PER_PAGE;
+ index /= ENTRIES_PER_PAGE;
+
+ if (index >= ENTRIES_PER_PAGE)
+ return ERR_PTR(-EFBIG);
if (!info->i_indirect) {
info->i_indirect = (swp_entry_t **) get_zeroed_page(GFP_USER);
if (!info->i_indirect)
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
- if(!(info->i_indirect[index/ENTRIES_PER_PAGE])) {
- info->i_indirect[index/ENTRIES_PER_PAGE] = (swp_entry_t *) get_zeroed_page(GFP_USER);
- if (!info->i_indirect[index/ENTRIES_PER_PAGE])
- return NULL;
+ if(!(info->i_indirect[index])) {
+ info->i_indirect[index] = (swp_entry_t *) get_zeroed_page(GFP_USER);
+ if (!info->i_indirect[index])
+ return ERR_PTR(-ENOMEM);
}
- return info->i_indirect[index/ENTRIES_PER_PAGE]+index%ENTRIES_PER_PAGE;
+ return info->i_indirect[index]+offset;
}
static int shmem_free_swp(swp_entry_t *dir, unsigned int count)
{
swp_entry_t *ptr, entry;
- struct page * page;
int freed = 0;
for (ptr = dir; ptr < dir + count; ptr++) {
if (!ptr->val)
continue;
entry = *ptr;
- swap_free (entry);
*ptr = (swp_entry_t){0};
freed++;
- if (!(page = lookup_swap_cache(entry)))
- continue;
- delete_from_swap_cache(page);
- page_cache_release(page);
+
+ /* vmscan will do the actual page freeing later.. */
+ swap_free (entry);
}
return freed;
}
@@ -98,7 +135,6 @@
* @dir: pointer to swp_entries
* @size: number of entries in dir
* @start: offset to start from
- * @inode: inode for statistics
* @freed: counter for freed pages
*
* It frees the swap entries from dir+start til dir+size
@@ -108,7 +144,7 @@
static unsigned long
shmem_truncate_part (swp_entry_t * dir, unsigned long size,
- unsigned long start, struct inode * inode, unsigned long *freed) {
+ unsigned long start, unsigned long *freed) {
if (start > size)
return start - size;
if (dir)
@@ -117,56 +153,28 @@
return 0;
}
-/*
- * shmem_recalc_inode - recalculate the size of an inode
- *
- * @inode: inode to recalc
- *
- * We have to calculate the free blocks since the mm can drop pages
- * behind our back
- *
- * But we know that normally
- * inodes->i_blocks == inode->i_mapping->nrpages + info->swapped
- *
- * So the mm freed
- * inodes->i_blocks - (inode->i_mapping->nrpages + info->swapped)
- *
- * It has to be called with the spinlock held.
- */
-
-static void shmem_recalc_inode(struct inode * inode)
-{
- unsigned long freed;
-
- freed = inode->i_blocks -
- (inode->i_mapping->nrpages + inode->u.shmem_i.swapped);
- if (freed){
- struct shmem_sb_info * info = &inode->i_sb->u.shmem_sb;
- inode->i_blocks -= freed;
- spin_lock (&info->stat_lock);
- info->free_blocks += freed;
- spin_unlock (&info->stat_lock);
- }
-}
-
static void shmem_truncate (struct inode * inode)
{
int clear_base;
- unsigned long start;
+ unsigned long index, start;
unsigned long freed = 0;
- swp_entry_t **base, **ptr;
+ swp_entry_t **base, **ptr, **last;
struct shmem_inode_info * info = &inode->u.shmem_i;
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
spin_lock (&info->lock);
- start = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (index > info->max_index)
+ goto out;
- start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, start, inode, &freed);
+ start = shmem_truncate_part (info->i_direct, SHMEM_NR_DIRECT, index, &freed);
if (!(base = info->i_indirect))
- goto out;;
+ goto out;
clear_base = 1;
- for (ptr = base; ptr < base + ENTRIES_PER_PAGE; ptr++) {
+ last = base + ((info->max_index - SHMEM_NR_DIRECT + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE);
+ for (ptr = base; ptr < last; ptr++) {
if (!start) {
if (!*ptr)
continue;
@@ -176,16 +184,16 @@
continue;
}
clear_base = 0;
- start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, inode, &freed);
+ start = shmem_truncate_part (*ptr, ENTRIES_PER_PAGE, start, &freed);
}
- if (!clear_base)
- goto out;
-
- free_page ((unsigned long)base);
- info->i_indirect = 0;
+ if (clear_base) {
+ free_page ((unsigned long)base);
+ info->i_indirect = 0;
+ }
out:
+ info->max_index = index;
info->swapped -= freed;
shmem_recalc_inode(inode);
spin_unlock (&info->lock);
@@ -207,34 +215,44 @@
}
/*
- * Move the page from the page cache to the swap cache
+ * Move the page from the page cache to the swap cache.
+ *
+ * The page lock prevents multiple occurences of shmem_writepage at
+ * once. We still need to guard against racing with
+ * shmem_getpage_locked().
*/
static int shmem_writepage(struct page * page)
{
- int error;
+ int error = 0;
struct shmem_inode_info *info;
swp_entry_t *entry, swap;
+ struct inode *inode;
- info = &page->mapping->host->u.shmem_i;
+ if (!PageLocked(page))
+ BUG();
+
+ /* Only move to the swap cache if there are no other users of
+ * the page. */
+ if (atomic_read(&page->count) > 2)
+ goto out;
+
+ inode = page->mapping->host;
+ info = &inode->u.shmem_i;
swap = __get_swap_page(2);
- if (!swap.val) {
- set_page_dirty(page);
- UnlockPage(page);
- return -ENOMEM;
- }
+ error = -ENOMEM;
+ if (!swap.val)
+ goto out;
spin_lock(&info->lock);
- shmem_recalc_inode(page->mapping->host);
- entry = shmem_swp_entry (info, page->index);
- if (!entry) /* this had been allocted on page allocation */
+ entry = shmem_swp_entry(info, page->index);
+ if (IS_ERR(entry)) /* this had been allocted on page allocation */
BUG();
+ shmem_recalc_inode(page->mapping->host);
error = -EAGAIN;
- if (entry->val) {
- __swap_free(swap, 2);
- goto out;
- }
+ if (entry->val)
+ BUG();
- *entry = swap;
+ *entry = swap;
error = 0;
/* Remove the from the page cache */
lru_cache_del(page);
@@ -243,99 +261,186 @@
/* Add it to the swap cache */
add_to_swap_cache(page, swap);
page_cache_release(page);
- set_page_dirty(page);
info->swapped++;
-out:
+
spin_unlock(&info->lock);
+out:
+ set_page_dirty(page);
UnlockPage(page);
return error;
}
/*
- * shmem_nopage - either get the page from swap or allocate a new one
+ * shmem_getpage_locked - either get the page from swap or allocate a new one
*
* If we allocate a new one we do not mark it dirty. That's up to the
* vm. If we swap it in we mark it dirty since we also free the swap
* entry since a page cannot live in both the swap and page cache
+ *
+ * Called with the inode locked, so it cannot race with itself, but we
+ * still need to guard against racing with shm_writepage(), which might
+ * be trying to move the page to the swap cache as we run.
*/
-struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
+static struct page * shmem_getpage_locked(struct inode * inode, unsigned long idx)
{
- unsigned long size;
- struct page * page;
- unsigned int idx;
- swp_entry_t *entry;
- struct inode * inode = vma->vm_file->f_dentry->d_inode;
struct address_space * mapping = inode->i_mapping;
struct shmem_inode_info *info;
+ struct page * page;
+ swp_entry_t *entry;
- idx = (address - vma->vm_start) >> PAGE_SHIFT;
- idx += vma->vm_pgoff;
-
- down (&inode->i_sem);
- size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- page = NOPAGE_SIGBUS;
- if ((idx >= size) && (vma->vm_mm == current->mm))
- goto out;
+ info = &inode->u.shmem_i;
- /* retry, we may have slept */
- page = __find_lock_page(mapping, idx, page_hash (mapping, idx));
+repeat:
+ page = find_lock_page(mapping, idx);
if (page)
- goto cached_page;
+ return page;
- info = &inode->u.shmem_i;
entry = shmem_swp_entry (info, idx);
- if (!entry)
- goto oom;
+ if (IS_ERR(entry))
+ return (void *)entry;
+
spin_lock (&info->lock);
- shmem_recalc_inode(inode);
- spin_unlock (&info->lock);
+
+ /* The shmem_swp_entry() call may have blocked, and
+ * shmem_writepage may have been moving a page between the page
+ * cache and swap cache. We need to recheck the page cache
+ * under the protection of the info->lock spinlock. */
+
+ page = __find_get_page(mapping, idx, page_hash(mapping, idx));
+ if (page) {
+ if (TryLockPage(page))
+ goto wait_retry;
+ spin_unlock (&info->lock);
+ return page;
+ }
+
if (entry->val) {
unsigned long flags;
/* Look it up and read it in.. */
- page = lookup_swap_cache(*entry);
+ page = __find_get_page(&swapper_space, entry->val,
+ page_hash(&swapper_space, entry->val));
if (!page) {
+ spin_unlock (&info->lock);
lock_kernel();
swapin_readahead(*entry);
- page = read_swap_cache(*entry);
+ page = read_swap_cache_async(*entry);
unlock_kernel();
if (!page)
- goto oom;
+ return ERR_PTR(-ENOMEM);
+ wait_on_page(page);
+ if (!Page_Uptodate(page)) {
+ page_cache_release(page);
+ return ERR_PTR(-EIO);
+ }
+
+ /* Too bad we can't trust this page, because we
+ * dropped the info->lock spinlock */
+ page_cache_release(page);
+ goto repeat;
}
/* We have to this with page locked to prevent races */
- spin_lock (&info->lock);
+ if (TryLockPage(page))
+ goto wait_retry;
+
swap_free(*entry);
- lock_page(page);
- delete_from_swap_cache_nolock(page);
*entry = (swp_entry_t) {0};
+ delete_from_swap_cache_nolock(page);
flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1));
page->flags = flags | (1 << PG_dirty);
add_to_page_cache_locked(page, mapping, idx);
info->swapped--;
spin_unlock (&info->lock);
} else {
+ spin_unlock (&info->lock);
spin_lock (&inode->i_sb->u.shmem_sb.stat_lock);
if (inode->i_sb->u.shmem_sb.free_blocks == 0)
goto no_space;
inode->i_sb->u.shmem_sb.free_blocks--;
spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
- /* Ok, get a new page */
+
+ /* Ok, get a new page. We don't have to worry about the
+ * info->lock spinlock here: we cannot race against
+ * shm_writepage because we have already verified that
+ * there is no page present either in memory or in the
+ * swap cache, so we are guaranteed to be populating a
+ * new shm entry. The inode semaphore we already hold
+ * is enough to make this atomic. */
page = page_cache_alloc(mapping);
if (!page)
- goto oom;
- clear_user_highpage(page, address);
- inode->i_blocks++;
+ return ERR_PTR(-ENOMEM);
+ clear_highpage(page);
+ inode->i_blocks += BLOCKS_PER_PAGE;
add_to_page_cache (page, mapping, idx);
}
+
/* We have the page */
- SetPageUptodate (page);
+ SetPageUptodate(page);
if (info->locked)
page_cache_get(page);
+ return page;
+no_space:
+ spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
+ return ERR_PTR(-ENOSPC);
-cached_page:
- UnlockPage (page);
- up(&inode->i_sem);
+wait_retry:
+ spin_unlock (&info->lock);
+ wait_on_page(page);
+ page_cache_release(page);
+ goto repeat;
+}
+
+static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr)
+{
+ struct address_space * mapping = inode->i_mapping;
+ int error;
+
+ *ptr = NOPAGE_SIGBUS;
+ if (inode->i_size <= (loff_t) idx * PAGE_CACHE_SIZE)
+ return -EFAULT;
+
+ *ptr = __find_get_page(mapping, idx, page_hash(mapping, idx));
+ if (*ptr) {
+ if (Page_Uptodate(*ptr))
+ return 0;
+ page_cache_release(*ptr);
+ }
+
+ down (&inode->i_sem);
+ /* retest we may have slept */
+ if (inode->i_size < (loff_t) idx * PAGE_CACHE_SIZE)
+ goto sigbus;
+ *ptr = shmem_getpage_locked(inode, idx);
+ if (IS_ERR (*ptr))
+ goto failed;
+ UnlockPage(*ptr);
+ up (&inode->i_sem);
+ return 0;
+failed:
+ up (&inode->i_sem);
+ error = PTR_ERR(*ptr);
+ *ptr = NOPAGE_OOM;
+ if (error != -EFBIG)
+ *ptr = NOPAGE_SIGBUS;
+ return error;
+sigbus:
+ up (&inode->i_sem);
+ *ptr = NOPAGE_SIGBUS;
+ return -EFAULT;
+}
+
+struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int no_share)
+{
+ struct page * page;
+ unsigned int idx;
+ struct inode * inode = vma->vm_file->f_dentry->d_inode;
+
+ idx = (address - vma->vm_start) >> PAGE_SHIFT;
+ idx += vma->vm_pgoff;
+
+ if (shmem_getpage(inode, idx, &page))
+ return page;
if (no_share) {
struct page *new_page = page_cache_alloc(inode->i_mapping);
@@ -351,13 +456,45 @@
flush_page_to_ram (page);
return(page);
-no_space:
- spin_unlock (&inode->i_sb->u.shmem_sb.stat_lock);
-oom:
- page = NOPAGE_OOM;
-out:
+}
+
+void shmem_lock(struct file * file, int lock)
+{
+ struct inode * inode = file->f_dentry->d_inode;
+ struct shmem_inode_info * info = &inode->u.shmem_i;
+ struct page * page;
+ unsigned long idx, size;
+
+ if (info->locked == lock)
+ return;
+ down(&inode->i_sem);
+ info->locked = lock;
+ size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ for (idx = 0; idx < size; idx++) {
+ page = find_lock_page(inode->i_mapping, idx);
+ if (!page)
+ continue;
+ if (!lock) {
+ /* release the extra count and our reference */
+ page_cache_release(page);
+ page_cache_release(page);
+ }
+ UnlockPage(page);
+ }
up(&inode->i_sem);
- return page;
+}
+
+static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
+{
+ struct vm_operations_struct * ops;
+ struct inode *inode = file->f_dentry->d_inode;
+
+ ops = &shmem_vm_ops;
+ if (!inode->i_sb || !S_ISREG(inode->i_mode))
+ return -EACCES;
+ UPDATE_ATIME(inode);
+ vma->vm_ops = ops;
+ return 0;
}
struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
@@ -392,11 +529,13 @@
inode->i_fop = &shmem_file_operations;
break;
case S_IFDIR:
+ inode->i_nlink++;
inode->i_op = &shmem_dir_inode_operations;
inode->i_fop = &shmem_dir_operations;
break;
case S_IFLNK:
- BUG();
+ inode->i_op = &shmem_symlink_inode_operations;
+ break;
}
spin_lock (&shmem_ilock);
list_add (&inode->u.shmem_i.list, &shmem_inodes);
@@ -405,49 +544,242 @@
return inode;
}
+#ifdef CONFIG_TMPFS
+static ssize_t
+shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+ loff_t pos;
+ struct page *page;
+ unsigned long written;
+ long status;
+ int err;
+
+
+ down(&inode->i_sem);
+
+ pos = *ppos;
+ err = -EINVAL;
+ if (pos < 0)
+ goto out;
+
+ err = file->f_error;
+ if (err) {
+ file->f_error = 0;
+ goto out;
+ }
+
+ written = 0;
+
+ if (file->f_flags & O_APPEND)
+ pos = inode->i_size;
+
+ /*
+ * Check whether we've reached the file size limit.
+ */
+ err = -EFBIG;
+ if (limit != RLIM_INFINITY) {
+ if (pos >= limit) {
+ send_sig(SIGXFSZ, current, 0);
+ goto out;
+ }
+ if (count > limit - pos) {
+ send_sig(SIGXFSZ, current, 0);
+ count = limit - pos;
+ }
+ }
+
+ status = 0;
+ if (count) {
+ remove_suid(inode);
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ }
+
+ while (count) {
+ unsigned long bytes, index, offset;
+ char *kaddr;
+ int deactivate = 1;
+
+ /*
+ * Try to find the page in the cache. If it isn't there,
+ * allocate a free page.
+ */
+ offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+ index = pos >> PAGE_CACHE_SHIFT;
+ bytes = PAGE_CACHE_SIZE - offset;
+ if (bytes > count) {
+ bytes = count;
+ deactivate = 0;
+ }
+
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ */
+ { volatile unsigned char dummy;
+ __get_user(dummy, buf);
+ __get_user(dummy, buf+bytes-1);
+ }
+
+ page = shmem_getpage_locked(inode, index);
+ status = PTR_ERR(page);
+ if (IS_ERR(page))
+ break;
+
+ /* We have exclusive IO access to the page.. */
+ if (!PageLocked(page)) {
+ PAGE_BUG(page);
+ }
+
+ kaddr = kmap(page);
+// can this do a truncated write? cr
+ status = copy_from_user(kaddr+offset, buf, bytes);
+ kunmap(page);
+ if (status)
+ goto fail_write;
+
+ flush_dcache_page(page);
+ if (bytes > 0) {
+ SetPageDirty(page);
+ written += bytes;
+ count -= bytes;
+ pos += bytes;
+ buf += bytes;
+ if (pos > inode->i_size)
+ inode->i_size = pos;
+ if (inode->u.shmem_i.max_index <= index)
+ inode->u.shmem_i.max_index = index+1;
+
+ }
+unlock:
+ /* Mark it unlocked again and drop the page.. */
+ UnlockPage(page);
+ if (deactivate)
+ deactivate_page(page);
+ page_cache_release(page);
+
+ if (status < 0)
+ break;
+ }
+ *ppos = pos;
+
+ err = written ? written : status;
+out:
+ up(&inode->i_sem);
+ return err;
+fail_write:
+ status = -EFAULT;
+ ClearPageUptodate(page);
+ kunmap(page);
+ goto unlock;
+}
+
+static void do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct address_space *mapping = inode->i_mapping;
+ unsigned long index, offset;
+ int nr = 1;
+
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ offset = *ppos & ~PAGE_CACHE_MASK;
+
+ while (nr && desc->count) {
+ struct page *page;
+ unsigned long end_index, nr;
+
+ end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ if (index > end_index)
+ break;
+ nr = PAGE_CACHE_SIZE;
+ if (index == end_index) {
+ nr = inode->i_size & ~PAGE_CACHE_MASK;
+ if (nr <= offset)
+ break;
+ }
+
+ nr = nr - offset;
+
+ if ((desc->error = shmem_getpage(inode, index, &page)))
+ break;
+
+ if (mapping->i_mmap_shared != NULL)
+ flush_dcache_page(page);
+
+ /*
+ * Ok, we have the page, and it's up-to-date, so
+ * now we can copy it to user space...
+ *
+ * The actor routine returns how many bytes were actually used..
+ * NOTE! This may not be the same as how much of a user buffer
+ * we filled up (we may be padding etc), so we can only update
+ * "pos" here (the actor routine has to update the user buffer
+ * pointers and the remaining count).
+ */
+ nr = file_read_actor(desc, page, offset, nr);
+ offset += nr;
+ index += offset >> PAGE_CACHE_SHIFT;
+ offset &= ~PAGE_CACHE_MASK;
+
+ page_cache_release(page);
+ }
+
+ *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
+ UPDATE_ATIME(inode);
+}
+
+static ssize_t shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
+{
+ ssize_t retval;
+
+ retval = -EFAULT;
+ if (access_ok(VERIFY_WRITE, buf, count)) {
+ retval = 0;
+
+ if (count) {
+ read_descriptor_t desc;
+
+ desc.written = 0;
+ desc.count = count;
+ desc.buf = buf;
+ desc.error = 0;
+ do_shmem_file_read(filp, ppos, &desc);
+
+ retval = desc.written;
+ if (!retval)
+ retval = desc.error;
+ }
+ }
+ return retval;
+}
+
static int shmem_statfs(struct super_block *sb, struct statfs *buf)
{
- buf->f_type = SHMEM_MAGIC;
+ buf->f_type = TMPFS_MAGIC;
buf->f_bsize = PAGE_CACHE_SIZE;
spin_lock (&sb->u.shmem_sb.stat_lock);
- if (sb->u.shmem_sb.max_blocks != ULONG_MAX ||
- sb->u.shmem_sb.max_inodes != ULONG_MAX) {
+ if (sb->u.shmem_sb.max_blocks == ULONG_MAX) {
+ /*
+ * This is only a guestimate and not honoured.
+ * We need it to make some programs happy which like to
+ * test the free space of a file system.
+ */
+ buf->f_bavail = buf->f_bfree = nr_free_pages() + nr_swap_pages + atomic_read(&buffermem_pages);
+ buf->f_blocks = buf->f_bfree + ULONG_MAX - sb->u.shmem_sb.free_blocks;
+ } else {
buf->f_blocks = sb->u.shmem_sb.max_blocks;
buf->f_bavail = buf->f_bfree = sb->u.shmem_sb.free_blocks;
- buf->f_files = sb->u.shmem_sb.max_inodes;
- buf->f_ffree = sb->u.shmem_sb.free_inodes;
}
+ buf->f_files = sb->u.shmem_sb.max_inodes;
+ buf->f_ffree = sb->u.shmem_sb.free_inodes;
spin_unlock (&sb->u.shmem_sb.stat_lock);
buf->f_namelen = 255;
return 0;
}
-void shmem_lock(struct file * file, int lock)
-{
- struct inode * inode = file->f_dentry->d_inode;
- struct shmem_inode_info * info = &inode->u.shmem_i;
- struct page * page;
- unsigned long idx, size;
-
- if (info->locked == lock)
- return;
- down(&inode->i_sem);
- info->locked = lock;
- size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- for (idx = 0; idx < size; idx++) {
- page = find_lock_page(inode->i_mapping, idx);
- if (!page)
- continue;
- if (!lock) {
- /* release the extra count and our reference */
- page_cache_release(page);
- page_cache_release(page);
- }
- UnlockPage(page);
- }
- up(&inode->i_sem);
-}
-
/*
* Lookup the data. This is trivial - if the dentry didn't already
* exist, we know it is negative.
@@ -466,6 +798,7 @@
struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev);
int error = -ENOSPC;
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode) {
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
@@ -476,7 +809,12 @@
static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode)
{
- return shmem_mknod(dir, dentry, mode | S_IFDIR, 0);
+ int error;
+
+ if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
+ return error;
+ dir->i_nlink++;
+ return 0;
}
static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
@@ -494,6 +832,7 @@
if (S_ISDIR(inode->i_mode))
return -EPERM;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
inode->i_nlink++;
atomic_inc(&inode->i_count); /* New dentry reference */
dget(dentry); /* Extra pinning count for the created dentry */
@@ -534,26 +873,24 @@
return 1;
}
-/*
- * This works for both directories and regular files.
- * (non-directories will always have empty subdirs)
- */
static int shmem_unlink(struct inode * dir, struct dentry *dentry)
{
- int retval = -ENOTEMPTY;
+ struct inode *inode = dentry->d_inode;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ inode->i_nlink--;
+ dput(dentry); /* Undo the count from "create" - this does all the work */
+ return 0;
+}
- if (shmem_empty(dentry)) {
- struct inode *inode = dentry->d_inode;
+static int shmem_rmdir(struct inode * dir, struct dentry *dentry)
+{
+ if (!shmem_empty(dentry))
+ return -ENOTEMPTY;
- inode->i_nlink--;
- dput(dentry); /* Undo the count from "create" - this does all the work */
- retval = 0;
- }
- return retval;
+ dir->i_nlink--;
+ return shmem_unlink(dir, dentry);
}
-#define shmem_rmdir shmem_unlink
-
/*
* The VFS layer already does all the dentry stuff for rename,
* we just have to decrement the usage count for the target if
@@ -567,27 +904,77 @@
if (shmem_empty(new_dentry)) {
struct inode *inode = new_dentry->d_inode;
if (inode) {
+ inode->i_ctime = CURRENT_TIME;
inode->i_nlink--;
dput(new_dentry);
}
error = 0;
+ old_dentry->d_inode->i_ctime = old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
}
return error;
}
-static int shmem_mmap(struct file * file, struct vm_area_struct * vma)
+static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
{
- struct vm_operations_struct * ops;
- struct inode *inode = file->f_dentry->d_inode;
+ int error;
+ int len;
+ struct inode *inode;
+ struct page *page;
+ char *kaddr;
- ops = &shmem_private_vm_ops;
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
- ops = &shmem_shared_vm_ops;
- if (!inode->i_sb || !S_ISREG(inode->i_mode))
- return -EACCES;
- UPDATE_ATIME(inode);
- vma->vm_ops = ops;
+ error = shmem_mknod(dir, dentry, S_IFLNK | S_IRWXUGO, 0);
+ if (error)
+ return error;
+
+ len = strlen(symname);
+ if (len > PAGE_SIZE)
+ return -ENAMETOOLONG;
+
+ inode = dentry->d_inode;
+ down(&inode->i_sem);
+ page = shmem_getpage_locked(inode, 0);
+ if (IS_ERR(page))
+ goto fail;
+ kaddr = kmap(page);
+ memcpy(kaddr, symname, len);
+ kunmap(page);
+ inode->i_size = len;
+ SetPageDirty(page);
+ UnlockPage(page);
+ page_cache_release(page);
+ up(&inode->i_sem);
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
return 0;
+fail:
+ up(&inode->i_sem);
+ return PTR_ERR(page);
+}
+
+static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+ struct page * page;
+ int res = shmem_getpage(dentry->d_inode, 0, &page);
+
+ if (res)
+ return res;
+
+ res = vfs_readlink(dentry,buffer,buflen, kmap(page));
+ kunmap(page);
+ page_cache_release(page);
+ return res;
+}
+
+static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct page * page;
+ int res = shmem_getpage(dentry->d_inode, 0, &page);
+ if (res)
+ return res;
+
+ res = vfs_follow_link(nd, kmap(page));
+ kunmap(page);
+ page_cache_release(page);
+ return res;
}
static int shmem_parse_options(char *options, int *mode, unsigned long * blocks, unsigned long *inodes)
@@ -600,16 +987,24 @@
for ( ; this_char; this_char = strtok(NULL,",")) {
if ((value = strchr(this_char,'=')) != NULL)
*value++ = 0;
- if (!strcmp(this_char,"nr_blocks")) {
+ if (!strcmp(this_char,"size")) {
+ unsigned long long size;
if (!value || !*value || !blocks)
return 1;
- *blocks = simple_strtoul(value,&value,0);
+ size = memparse(value,&value);
+ if (*value)
+ return 1;
+ *blocks = size >> PAGE_CACHE_SHIFT;
+ } else if (!strcmp(this_char,"nr_blocks")) {
+ if (!value || !*value || !blocks)
+ return 1;
+ *blocks = memparse(value,&value);
if (*value)
return 1;
} else if (!strcmp(this_char,"nr_inodes")) {
if (!value || !*value || !inodes)
return 1;
- *inodes = simple_strtoul(value,&value,0);
+ *inodes = memparse(value,&value);
if (*value)
return 1;
} else if (!strcmp(this_char,"mode")) {
@@ -622,9 +1017,42 @@
else
return 1;
}
+ return 0;
+}
+static int shmem_remount_fs (struct super_block *sb, int *flags, char *data)
+{
+ int error;
+ unsigned long max_blocks, blocks;
+ unsigned long max_inodes, inodes;
+ struct shmem_sb_info *info = &sb->u.shmem_sb;
+
+ if (shmem_parse_options (data, NULL, &max_blocks, &max_inodes))
+ return -EINVAL;
+
+ spin_lock(&info->stat_lock);
+ blocks = info->max_blocks - info->free_blocks;
+ inodes = info->max_inodes - info->free_inodes;
+ error = -EINVAL;
+ if (max_blocks < blocks)
+ goto out;
+ if (max_inodes < inodes)
+ goto out;
+ error = 0;
+ info->max_blocks = max_blocks;
+ info->free_blocks = max_blocks - blocks;
+ info->max_inodes = max_inodes;
+ info->free_inodes = max_inodes - inodes;
+out:
+ spin_unlock(&info->stat_lock);
+ return error;
+}
+
+int shmem_sync_file(struct file * file, struct dentry *dentry, int datasync)
+{
return 0;
}
+#endif
static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent)
{
@@ -634,19 +1062,22 @@
unsigned long inodes = ULONG_MAX; /* unlimited */
int mode = S_IRWXUGO | S_ISVTX;
+#ifdef CONFIG_TMPFS
if (shmem_parse_options (data, &mode, &blocks, &inodes)) {
- printk(KERN_ERR "shmem fs invalid option\n");
+ printk(KERN_ERR "tmpfs invalid option\n");
return NULL;
}
+#endif
spin_lock_init (&sb->u.shmem_sb.stat_lock);
sb->u.shmem_sb.max_blocks = blocks;
sb->u.shmem_sb.free_blocks = blocks;
sb->u.shmem_sb.max_inodes = inodes;
sb->u.shmem_sb.free_inodes = inodes;
+ sb->s_maxbytes = (unsigned long long)(SHMEM_NR_DIRECT + (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)) << PAGE_CACHE_SHIFT;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
- sb->s_magic = SHMEM_MAGIC;
+ sb->s_magic = TMPFS_MAGIC;
sb->s_op = &shmem_ops;
inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
if (!inode)
@@ -661,103 +1092,108 @@
return sb;
}
-static int shmem_remount_fs (struct super_block *sb, int *flags, char *data)
-{
- int error;
- unsigned long max_blocks, blocks;
- unsigned long max_inodes, inodes;
- struct shmem_sb_info *info = &sb->u.shmem_sb;
- if (shmem_parse_options (data, NULL, &max_blocks, &max_inodes))
- return -EINVAL;
-
- spin_lock(&info->stat_lock);
- blocks = info->max_blocks - info->free_blocks;
- inodes = info->max_inodes - info->free_inodes;
- error = -EINVAL;
- if (max_blocks < blocks)
- goto out;
- if (max_inodes < inodes)
- goto out;
- error = 0;
- info->max_blocks = max_blocks;
- info->free_blocks = max_blocks - blocks;
- info->max_inodes = max_inodes;
- info->free_inodes = max_inodes - inodes;
-out:
- spin_unlock(&info->stat_lock);
- return error;
-}
static struct address_space_operations shmem_aops = {
writepage: shmem_writepage
};
static struct file_operations shmem_file_operations = {
- mmap: shmem_mmap
+ mmap: shmem_mmap,
+#ifdef CONFIG_TMPFS
+ read: shmem_file_read,
+ write: shmem_file_write,
+ fsync: shmem_sync_file,
+#endif
};
static struct inode_operations shmem_inode_operations = {
truncate: shmem_truncate,
};
+static struct inode_operations shmem_symlink_inode_operations = {
+ truncate: shmem_truncate,
+#ifdef CONFIG_TMPFS
+ readlink: shmem_readlink,
+ follow_link: shmem_follow_link,
+#endif
+};
+
static struct file_operations shmem_dir_operations = {
read: generic_read_dir,
readdir: dcache_readdir,
+#ifdef CONFIG_TMPFS
+ fsync: shmem_sync_file,
+#endif
};
static struct inode_operations shmem_dir_inode_operations = {
+#ifdef CONFIG_TMPFS
create: shmem_create,
lookup: shmem_lookup,
link: shmem_link,
unlink: shmem_unlink,
+ symlink: shmem_symlink,
mkdir: shmem_mkdir,
rmdir: shmem_rmdir,
mknod: shmem_mknod,
rename: shmem_rename,
+#endif
};
static struct super_operations shmem_ops = {
+#ifdef CONFIG_TMPFS
statfs: shmem_statfs,
remount_fs: shmem_remount_fs,
+#endif
delete_inode: shmem_delete_inode,
put_inode: force_delete,
};
-static struct vm_operations_struct shmem_private_vm_ops = {
- nopage: shmem_nopage,
-};
-
-static struct vm_operations_struct shmem_shared_vm_ops = {
+static struct vm_operations_struct shmem_vm_ops = {
nopage: shmem_nopage,
};
+#ifdef CONFIG_TMPFS
+/* type "shm" will be tagged obsolete in 2.5 */
static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
+static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
+#else
+static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
+#endif
static int __init init_shmem_fs(void)
{
int error;
struct vfsmount * res;
+ if ((error = register_filesystem(&tmpfs_fs_type))) {
+ printk (KERN_ERR "Could not register tmpfs\n");
+ return error;
+ }
+#ifdef CONFIG_TMPFS
if ((error = register_filesystem(&shmem_fs_type))) {
- printk (KERN_ERR "Could not register shmem fs\n");
+ printk (KERN_ERR "Could not register shm fs\n");
return error;
}
-
- res = kern_mount(&shmem_fs_type);
+ devfs_mk_dir (NULL, "shm", NULL);
+#endif
+ res = kern_mount(&tmpfs_fs_type);
if (IS_ERR (res)) {
- printk (KERN_ERR "could not kern_mount shmem fs\n");
- unregister_filesystem(&shmem_fs_type);
+ printk (KERN_ERR "could not kern_mount tmpfs\n");
+ unregister_filesystem(&tmpfs_fs_type);
return PTR_ERR(res);
}
- devfs_mk_dir (NULL, "shm", NULL);
return 0;
}
static void __exit exit_shmem_fs(void)
{
+#ifdef CONFIG_TMPFS
unregister_filesystem(&shmem_fs_type);
+#endif
+ unregister_filesystem(&tmpfs_fs_type);
}
module_init(init_shmem_fs)
@@ -853,7 +1289,7 @@
this.name = name;
this.len = strlen(name);
this.hash = 0; /* will go */
- root = shmem_fs_type.kern_mnt->mnt_root;
+ root = tmpfs_fs_type.kern_mnt->mnt_root;
dentry = d_alloc(root, &this);
if (!dentry)
goto out;
@@ -870,7 +1306,8 @@
d_instantiate(dentry, inode);
dentry->d_inode->i_size = size;
- file->f_vfsmnt = mntget(shmem_fs_type.kern_mnt);
+ shmem_truncate(inode);
+ file->f_vfsmnt = mntget(tmpfs_fs_type.kern_mnt);
file->f_dentry = dentry;
file->f_op = &shmem_file_operations;
file->f_mode = FMODE_WRITE | FMODE_READ;
@@ -901,6 +1338,8 @@
if (vma->vm_file)
fput (vma->vm_file);
vma->vm_file = file;
- vma->vm_ops = &shmem_shared_vm_ops;
+ vma->vm_ops = &shmem_vm_ops;
return 0;
}
+
+EXPORT_SYMBOL(shmem_file_setup);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)