Index: Documentation/filesystems/ntfs.txt =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/Documentation/filesystems/ntfs.txt (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/Documentation/filesystems/ntfs.txt (mode:100644) @@ -21,7 +21,7 @@ ======== Linux-NTFS comes with a number of user-space programs known as ntfsprogs. -These include mkntfs, a full-featured ntfs file system format utility, +These include mkntfs, a full-featured ntfs filesystem format utility, ntfsundelete used for recovering files that were unintentionally deleted from an NTFS volume and ntfsresize which is used to resize an NTFS partition. See the web site for more information. @@ -149,7 +149,14 @@ name, if it exists. If case_sensitive, you will need to provide the correct case of the short file name. -errors=opt What to do when critical file system errors are found. +disable_sparse= If disable_sparse is specified, creation of sparse + regions, i.e. holes, inside files is disabled for the + volume (for the duration of this mount only). By + default, creation of sparse regions is enabled, which + is consistent with the behaviour of traditional Unix + filesystems. + +errors=opt What to do when critical filesystem errors are found. Following values can be used for "opt": continue: DEFAULT, try to clean-up as much as possible, e.g. marking a corrupt inode as Index: fs/ntfs/ChangeLog =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/ChangeLog (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/ChangeLog (mode:100644) @@ -2,20 +2,18 @@ - Find and fix bugs. - Checkpoint or disable the user space journal ($UsnJrnl). - In between ntfs_prepare/commit_write, need exclusion between - simultaneous file extensions. Need perhaps an NInoResizeUnderway() - flag which we can set in ntfs_prepare_write() and clear again in - ntfs_commit_write(). Just have to be careful in readpage/writepage, - as well as in truncate, that we play nice... We might need to have - a data_size field in the ntfs_inode to store the real attribute - length. Also need to be careful with initialized_size extention in + simultaneous file extensions. This is given to us by holding i_sem + on the inode. The only places in the kernel when a file is resized + are prepare/commit write and truncate for both of which i_sem is + held. Just have to be careful in readpage/writepage and all other + helpers not running under i_sem that we play nice... + Also need to be careful with initialized_size extention in ntfs_prepare_write. Basically, just be _very_ careful in this code... - OTOH, perhaps i_sem, which is held accross generic_file_write is - sufficient for synchronisation here. We then just need to make sure - ntfs_readpage/writepage/truncate interoperate properly with us. - UPDATE: The above is all ok as it is due to i_sem held. The only - thing that needs to be checked is ntfs_writepage() which does not - hold i_sem. It cannot change i_size but it needs to cope with a - concurrent i_size change. + UPDATE: The only things that need to be checked are read/writepage + which do not hold i_sem. Note writepage cannot change i_size but it + needs to cope with a concurrent i_size change, just like readpage. + Also both need to cope with concurrent changes to the other sizes, + i.e. initialized/allocated/compressed size, as well. - Implement mft.c::sync_mft_mirror_umount(). We currently will just leave the volume dirty on umount if the final iput(vol->mft_ino) causes a write of any mirrored mft records due to the mft mirror @@ -31,6 +29,92 @@ compiled without debug. This avoids a possible denial of service attack. Thanks to Carl-Daniel Hailfinger from SuSE for pointing this out. + - Fix compilation warnings on ia64. (Randy Dunlap) + - Use i_size_read() in fs/ntfs/attrib.c::ntfs_attr_set(). + - Use i_size_read() in fs/ntfs/logfile.c::ntfs_{check,empty}_logfile(). + - Use i_size_read() once and then use the cached value in + fs/ntfs/lcnalloc.c::ntfs_cluster_alloc(). + - Use i_size_read() in fs/ntfs/file.c::ntfs_file_open(). + - Add size_lock to the ntfs_inode structure. This is an rw spinlock + and it locks against access to the inode sizes. Note, ->size_lock + is also accessed from irq context so you must use the _irqsave and + _irqrestore lock and unlock functions, respectively. + - Use i_size_read() in fs/ntfs/compress.c at the start of the read and + use the cached value afterwards. Cache the initialized_size in the + same way and protect access to the two sizes using the size_lock. + - Use i_size_read() in fs/ntfs/dir.c once and then use the cached + value afterwards. + - Use i_size_read() in fs/ntfs/super.c once and then use the cached + value afterwards. Cache the initialized_size in the same way and + protect access to the two sizes using the size_lock. + - Minor optimization to fs/ntfs/super.c::ntfs_statfs() and its helpers. + - Use i_size_read() in fs/ntfs/inode.c once and then use the cached + value afterwards when reading the size of the bitmap inode. + - Use i_size_{read,write}() in fs/ntfs/{aops.c,mft.c} and protect + access to the i_size and other size fields using the size_lock. + - Implement extension of resident files in the regular file write code + paths (fs/ntfs/aops.c::ntfs_{prepare,commit}_write()). At present + this only works until the data attribute becomes too big for the mft + record after which we abort the write returning -EOPNOTSUPP from + ntfs_prepare_write(). + - Add disable_sparse mount option together with a per volume sparse + enable bit which is set appropriately and a per inode sparse disable + bit which is preset on some system file inodes as appropriate. + - Enforce that sparse support is disabled on NTFS volumes pre 3.0. + - Fix a bug in fs/ntfs/runlist.c::ntfs_mapping_pairs_decompress() in + the creation of the unmapped runlist element for the base attribute + extent. + - Split ntfs_map_runlist() into ntfs_map_runlist() and a non-locking + helper ntfs_map_runlist_nolock() which is used by ntfs_map_runlist(). + This allows us to map runlist fragments with the runlist lock already + held without having to drop and reacquire it around the call. Adapt + all callers. + - Change ntfs_find_vcn() to ntfs_find_vcn_nolock() which takes a locked + runlist. This allows us to find runlist elements with the runlist + lock already held without having to drop and reacquire it around the + call. Adapt all callers. + - Change time to u64 in time.h::ntfs2utc() as it otherwise generates a + warning in the do_div() call on sparc32. Thanks to Meelis Roos for + the report and analysis of the warning. + - Fix a nasty runlist merge bug when merging two holes. + - Set the ntfs_inode->allocated_size to the real allocated size in the + mft record for resident attributes (fs/ntfs/inode.c). + - Small readability cleanup to use "a" instead of "ctx->attr" + everywhere (fs/ntfs/inode.c). + - Make fs/ntfs/namei.c::ntfs_get_{parent,dentry} static and move the + definition of ntfs_export_ops from fs/ntfs/super.c to namei.c. Also, + declare ntfs_export_ops in fs/ntfs/ntfs.h. + - Correct sparse file handling. The compressed values need to be + checked and set in the ntfs inode as done for compressed files and + the compressed size needs to be used for vfs inode->i_blocks instead + of the allocated size, again, as done for compressed files. + - Add AT_EA in addition to AT_DATA to whitelist for being allowed to be + non-resident in fs/ntfs/attrib.c::ntfs_attr_can_be_non_resident(). + - Add fs/ntfs/attrib.c::ntfs_attr_vcn_to_lcn_nolock() used by the new + write code. + - Fix bug in fs/ntfs/attrib.c::ntfs_find_vcn_nolock() where after + dropping the read lock and taking the write lock we were not checking + whether someone else did not already do the work we wanted to do. + - Rename fs/ntfs/attrib.c::ntfs_find_vcn_nolock() to + ntfs_attr_find_vcn_nolock() and update all callers. + - Add fs/ntfs/attrib.[hc]::ntfs_attr_make_non_resident(). + - Fix sign of various error return values to be negative in + fs/ntfs/lcnalloc.c. + - Modify ->readpage and ->writepage (fs/ntfs/aops.c) so they detect and + handle the case where an attribute is converted from resident to + non-resident by a concurrent file write. + - Remove checks for NULL before calling kfree() since kfree() does the + checking itself. (Jesper Juhl) + - Some utilities modify the boot sector but do not update the checksum. + Thus, relax the checking in fs/ntfs/super.c::is_boot_sector_ntfs() to + only emit a warning when the checksum is incorrect rather than + refusing the mount. Thanks to Bernd Casimir for pointing this + problem out. + - Update attribute definition handling. + - Add NTFS_MAX_CLUSTER_SIZE and NTFS_MAX_PAGES_PER_CLUSTER constants. + - Use NTFS_MAX_CLUSTER_SIZE in super.c instead of hard coding 0x10000. + - Use MAX_BUF_PER_PAGE instead of variable sized array allocation for + better code generation and one less sparse warning in fs/ntfs/aops.c. 2.1.22 - Many bug and race fixes and error handling improvements. @@ -1037,7 +1121,7 @@ - Further runlist merging work. (Richard Russon) - Backwards compatibility for gcc-2.95. (Richard Russon) - Update to kernel 2.5.5-pre1 and rediff the now tiny patch. - - Convert to new file system declaration using ->ntfs_get_sb() and + - Convert to new filesystem declaration using ->ntfs_get_sb() and replacing ntfs_read_super() with ntfs_fill_super(). - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index overflow on 32-bit architectures. @@ -1333,7 +1417,7 @@ The driver is now actually useful! Yey. (-: It undoubtedly has got bugs though and it doesn't implement accesssing compressed files yet. Also, accessing files with attribute list attributes is not implemented yet - either. But for small or simple file systems it should work and allow + either. But for small or simple filesystems it should work and allow you to list directories, use stat on directory entries and the file system, open, read, mmap and llseek around in files. A big mile stone has been reached! @@ -1341,7 +1425,7 @@ tng-0.0.0 - Initial version tag. Initial driver implementation. The driver can mount and umount simple - NTFS file systems (i.e. ones without attribute lists in the system + NTFS filesystems (i.e. ones without attribute lists in the system files). If the mount fails there might be problems in the error handling code paths, so be warned. Otherwise it seems to be loading the system files nicely and the mft record read mapping/unmapping seems to be Index: fs/ntfs/Makefile =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/Makefile (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/Makefile (mode:100644) @@ -6,7 +6,7 @@ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.22\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.23-WIP\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG Index: fs/ntfs/aops.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/aops.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/aops.c (mode:100644) @@ -2,7 +2,7 @@ * aops.c - NTFS kernel address space operations and page cache handling. * Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -66,19 +66,22 @@ ni = NTFS_I(page->mapping->host); if (likely(uptodate)) { - s64 file_ofs; + s64 file_ofs, initialized_size; set_buffer_uptodate(bh); file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); + read_lock_irqsave(&ni->size_lock, flags); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); /* Check for the current buffer head overflowing. */ - if (file_ofs + bh->b_size > ni->initialized_size) { + if (file_ofs + bh->b_size > initialized_size) { char *addr; int ofs = 0; - if (file_ofs < ni->initialized_size) - ofs = ni->initialized_size - file_ofs; + if (file_ofs < initialized_size) + ofs = initialized_size - file_ofs; addr = kmap_atomic(page, KM_BIO_SRC_IRQ); memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); flush_dcache_page(page); @@ -132,7 +135,7 @@ i * rec_size), rec_size); flush_dcache_page(page); kunmap_atomic(addr, KM_BIO_SRC_IRQ); - if (likely(!PageError(page) && page_uptodate)) + if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); } unlock_page(page); @@ -168,6 +171,7 @@ runlist_element *rl; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; sector_t iblock, lblock, zblock; + unsigned long flags; unsigned int blocksize, vcn_ofs; int i, nr; unsigned char blocksize_bits; @@ -190,8 +194,10 @@ } iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + read_lock_irqsave(&ni->size_lock, flags); lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; + read_unlock_irqrestore(&ni->size_lock, flags); /* Loop through all the buffers in the page. */ rl = NULL; @@ -341,14 +347,15 @@ */ static int ntfs_readpage(struct file *file, struct page *page) { - loff_t i_size; ntfs_inode *ni, *base_ni; u8 *kaddr; ntfs_attr_search_ctx *ctx; MFT_RECORD *mrec; + unsigned long flags; u32 attr_len; int err = 0; +retry_readpage: BUG_ON(!PageLocked(page)); /* * This can potentially happen because we clear PageUptodate() during @@ -383,9 +390,9 @@ * Attribute is resident, implying it is not compressed or encrypted. * This also means the attribute is smaller than an mft record and * hence smaller than a page, so can simply zero out any pages with - * index above 0. We can also do this if the file size is 0. + * index above 0. */ - if (unlikely(page->index > 0 || !i_size_read(VFS_I(ni)))) { + if (unlikely(page->index > 0)) { kaddr = kmap_atomic(page, KM_USER0); memset(kaddr, 0, PAGE_CACHE_SIZE); flush_dcache_page(page); @@ -402,6 +409,14 @@ err = PTR_ERR(mrec); goto err_out; } + /* + * If a parallel write made the attribute non-resident, drop the mft + * record and retry the readpage. + */ + if (unlikely(NInoNonResident(ni))) { + unmap_mft_record(base_ni); + goto retry_readpage; + } ctx = ntfs_attr_get_search_ctx(base_ni, mrec); if (unlikely(!ctx)) { err = -ENOMEM; @@ -412,9 +427,10 @@ if (unlikely(err)) goto put_unm_err_out; attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); - i_size = i_size_read(VFS_I(ni)); - if (unlikely(attr_len > i_size)) - attr_len = i_size; + read_lock_irqsave(&ni->size_lock, flags); + if (unlikely(attr_len > ni->initialized_size)) + attr_len = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); kaddr = kmap_atomic(page, KM_USER0); /* Copy the data to the page. */ memcpy(kaddr, (u8*)ctx->attr + @@ -463,12 +479,15 @@ { VCN vcn; LCN lcn; + s64 initialized_size; + loff_t i_size; sector_t block, dblock, iblock; struct inode *vi; ntfs_inode *ni; ntfs_volume *vol; runlist_element *rl; struct buffer_head *bh, *head; + unsigned long flags; unsigned int blocksize, vcn_ofs; int err; BOOL need_end_writeback; @@ -510,11 +529,16 @@ /* The first block in the page. */ block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + read_lock_irqsave(&ni->size_lock, flags); + i_size = i_size_read(vi); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + /* The first out of bounds block for the data size. */ - dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; + dblock = (i_size + blocksize - 1) >> blocksize_bits; /* The last (fully or partially) initialized block. */ - iblock = ni->initialized_size >> blocksize_bits; + iblock = initialized_size >> blocksize_bits; /* * Be very careful. We have no exclusion from __set_page_dirty_buffers @@ -559,7 +583,7 @@ /* Make sure we have enough initialized size. */ if (unlikely((block >= iblock) && - (ni->initialized_size < vi->i_size))) { + (initialized_size < i_size))) { /* * If this page is fully outside initialized size, zero * out all pages between the current initialized size @@ -801,17 +825,15 @@ ntfs_inode *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; u8 *kaddr; - unsigned char bh_size_bits = vi->i_blkbits; - unsigned int bh_size = 1 << bh_size_bits; unsigned int rec_size = ni->itype.index.block_size; ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size]; struct buffer_head *bh, *head, *tbh, *rec_start_bh; - int max_bhs = PAGE_CACHE_SIZE / bh_size; - struct buffer_head *bhs[max_bhs]; + struct buffer_head *bhs[MAX_BUF_PER_PAGE]; runlist_element *rl; - int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2; - unsigned rec_size_bits; + int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2; + unsigned bh_size, rec_size_bits; BOOL sync, is_mft, page_is_dirty, rec_is_dirty; + unsigned char bh_size_bits; ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index " "0x%lx.", vi->i_ino, ni->type, page->index); @@ -826,7 +848,11 @@ */ BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) || (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION))); + bh_size_bits = vi->i_blkbits; + bh_size = 1 << bh_size_bits; + max_bhs = PAGE_CACHE_SIZE / bh_size; BUG_ON(!max_bhs); + BUG_ON(max_bhs > MAX_BUF_PER_PAGE); /* Were we called for sync purposes? */ sync = (wbc->sync_mode == WB_SYNC_ALL); @@ -846,7 +872,7 @@ (PAGE_CACHE_SHIFT - bh_size_bits); /* The first out of bounds block for the data size. */ - dblock = (vi->i_size + bh_size - 1) >> bh_size_bits; + dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits; rl = NULL; err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0; @@ -858,6 +884,7 @@ if (likely(block < rec_block)) { if (unlikely(block >= dblock)) { clear_buffer_dirty(bh); + set_buffer_uptodate(bh); continue; } /* @@ -949,7 +976,8 @@ "attribute type 0x%x) because " "its location on disk could " "not be determined (error " - "code %lli).", (s64)block << + "code %lli).", + (long long)block << bh_size_bits >> vol->mft_record_size_bits, ni->mft_no, ni->type, @@ -1223,19 +1251,17 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc) { loff_t i_size; - struct inode *vi; - ntfs_inode *ni, *base_ni; + struct inode *vi = page->mapping->host; + ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); char *kaddr; - ntfs_attr_search_ctx *ctx; - MFT_RECORD *m; + ntfs_attr_search_ctx *ctx = NULL; + MFT_RECORD *m = NULL; u32 attr_len; int err; +retry_writepage: BUG_ON(!PageLocked(page)); - - vi = page->mapping->host; i_size = i_size_read(vi); - /* Is the page fully outside i_size? (truncate in progress) */ if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)) { @@ -1248,8 +1274,6 @@ ntfs_debug("Write outside i_size - truncated?"); return 0; } - ni = NTFS_I(vi); - /* NInoNonResident() == NInoIndexAllocPresent() */ if (NInoNonResident(ni)) { /* @@ -1326,6 +1350,14 @@ ctx = NULL; goto err_out; } + /* + * If a parallel write made the attribute non-resident, drop the mft + * record and retry the writepage. + */ + if (unlikely(NInoNonResident(ni))) { + unmap_mft_record(base_ni); + goto retry_writepage; + } ctx = ntfs_attr_get_search_ctx(base_ni, m); if (unlikely(!ctx)) { err = -ENOMEM; @@ -1367,15 +1399,12 @@ */ attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); - i_size = i_size_read(VFS_I(ni)); - kaddr = kmap_atomic(page, KM_USER0); + i_size = i_size_read(vi); if (unlikely(attr_len > i_size)) { - /* Zero out of bounds area in the mft record. */ - memset((u8*)ctx->attr + le16_to_cpu( - ctx->attr->data.resident.value_offset) + - i_size, 0, attr_len - i_size); attr_len = i_size; + ctx->attr->data.resident.value_length = cpu_to_le32(attr_len); } + kaddr = kmap_atomic(page, KM_USER0); /* Copy the data from the page to the mft record. */ memcpy((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), @@ -1405,8 +1434,10 @@ err = 0; } else { ntfs_error(vi->i_sb, "Resident attribute write failed with " - "error %i. Setting page error flag.", err); + "error %i.", err); SetPageError(page); + NVolSetErrors(ni->vol); + make_bad_inode(vi); } unlock_page(page); if (ctx) @@ -1425,12 +1456,15 @@ { VCN vcn; LCN lcn; + s64 initialized_size; + loff_t i_size; sector_t block, ablock, iblock; struct inode *vi; ntfs_inode *ni; ntfs_volume *vol; runlist_element *rl; struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; + unsigned long flags; unsigned int vcn_ofs, block_start, block_end, blocksize; int err; BOOL is_retry; @@ -1462,16 +1496,20 @@ /* The first block in the page. */ block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + read_lock_irqsave(&ni->size_lock, flags); /* - * The first out of bounds block for the allocated size. No need to + * The first out of bounds block for the allocated size. No need to * round up as allocated_size is in multiples of cluster size and the * minimum cluster size is 512 bytes, which is equal to the smallest * blocksize. */ ablock = ni->allocated_size >> blocksize_bits; + i_size = i_size_read(vi); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); /* The last (fully or partially) initialized block. */ - iblock = ni->initialized_size >> blocksize_bits; + iblock = initialized_size >> blocksize_bits; /* Loop through all the buffers in the page. */ block_start = 0; @@ -1518,7 +1556,7 @@ * request, i.e. block < ablock is true. */ if (unlikely((block >= iblock) && - (ni->initialized_size < vi->i_size))) { + (initialized_size < i_size))) { /* * If this page is fully outside initialized size, zero * out all pages between the current initialized size @@ -1797,6 +1835,7 @@ unsigned from, unsigned to) { s64 new_size; + loff_t i_size; struct inode *vi = page->mapping->host; ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; @@ -1868,14 +1907,8 @@ BUG_ON(page_has_buffers(page)); new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to; /* If we do not need to resize the attribute allocation we are done. */ - if (new_size <= vi->i_size) + if (new_size <= i_size_read(vi)) goto done; - - // FIXME: We abort for now as this code is not safe. - ntfs_error(vi->i_sb, "Changing the file size is not supported yet. " - "Sorry."); - return -EOPNOTSUPP; - /* Map, pin, and lock the (base) mft record. */ if (!NInoAttr(ni)) base_ni = ni; @@ -1904,7 +1937,15 @@ a = ctx->attr; /* The total length of the attribute value. */ attr_len = le32_to_cpu(a->data.resident.value_length); - BUG_ON(vi->i_size != attr_len); + /* Fix an eventual previous failure of ntfs_commit_write(). */ + i_size = i_size_read(vi); + if (unlikely(attr_len > i_size)) { + attr_len = i_size; + a->data.resident.value_length = cpu_to_le32(attr_len); + } + /* If we do not need to resize the attribute allocation we are done. */ + if (new_size <= attr_len) + goto done_unm; /* Check if new size is allowed in $AttrDef. */ err = ntfs_attr_size_bounds_check(vol, ni->type, new_size); if (unlikely(err)) { @@ -1962,6 +2003,7 @@ } flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); +done_unm: ntfs_attr_put_search_ctx(ctx); unmap_mft_record(base_ni); /* @@ -2047,7 +2089,7 @@ * now we know ntfs_prepare_write() would have failed in the write * exceeds i_size case, so this will never trigger which is fine. */ - if (pos > vi->i_size) { + if (pos > i_size_read(vi)) { ntfs_error(vi->i_sb, "Writing beyond the existing file size is " "not supported yet. Sorry."); return -EOPNOTSUPP; @@ -2183,9 +2225,13 @@ } kunmap_atomic(kaddr, KM_USER0); /* Update i_size if necessary. */ - if (vi->i_size < attr_len) { + if (i_size_read(vi) < attr_len) { + unsigned long flags; + + write_lock_irqsave(&ni->size_lock, flags); ni->allocated_size = ni->initialized_size = attr_len; i_size_write(vi, attr_len); + write_unlock_irqrestore(&ni->size_lock, flags); } /* Mark the mft record dirty, so it gets written back. */ flush_dcache_mft_record_page(ctx->ntfs_ino); Index: fs/ntfs/attrib.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/attrib.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/attrib.c (mode:100644) @@ -1,7 +1,7 @@ /** * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -21,16 +21,19 @@ */ #include +#include #include "attrib.h" #include "debug.h" #include "layout.h" +#include "lcnalloc.h" +#include "malloc.h" #include "mft.h" #include "ntfs.h" #include "types.h" /** - * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode + * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode * @ni: ntfs inode for which to map (part of) a runlist * @vcn: map runlist part containing this vcn * @@ -38,24 +41,23 @@ * * Return 0 on success and -errno on error. * - * Locking: - The runlist must be unlocked on entry and is unlocked on return. - * - This function takes the lock for writing and modifies the runlist. + * Locking: - The runlist must be locked for writing. + * - This function modifies the runlist. */ -int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) +int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn) { ntfs_inode *base_ni; - ntfs_attr_search_ctx *ctx; MFT_RECORD *mrec; + ntfs_attr_search_ctx *ctx; + runlist_element *rl; int err = 0; ntfs_debug("Mapping runlist part containing vcn 0x%llx.", (unsigned long long)vcn); - if (!NInoAttr(ni)) base_ni = ni; else base_ni = ni->ext.base_ntfs_ino; - mrec = map_mft_record(base_ni); if (IS_ERR(mrec)) return PTR_ERR(mrec); @@ -66,15 +68,7 @@ } err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, vcn, NULL, 0, ctx); - if (unlikely(err)) - goto put_err_out; - - down_write(&ni->runlist.lock); - /* Make sure someone else didn't do the work while we were sleeping. */ - if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= - LCN_RL_NOT_MAPPED)) { - runlist_element *rl; - + if (likely(!err)) { rl = ntfs_mapping_pairs_decompress(ni->vol, ctx->attr, ni->runlist.rl); if (IS_ERR(rl)) @@ -82,9 +76,6 @@ else ni->runlist.rl = rl; } - up_write(&ni->runlist.lock); - -put_err_out: ntfs_attr_put_search_ctx(ctx); err_out: unmap_mft_record(base_ni); @@ -92,17 +83,132 @@ } /** - * ntfs_find_vcn - find a vcn in the runlist described by an ntfs inode - * @ni: ntfs inode describing the runlist to search - * @vcn: vcn to find - * @need_write: if false, lock for reading and if true, lock for writing + * ntfs_map_runlist - map (a part of) a runlist of an ntfs inode + * @ni: ntfs inode for which to map (part of) a runlist + * @vcn: map runlist part containing this vcn + * + * Map the part of a runlist containing the @vcn of the ntfs inode @ni. + * + * Return 0 on success and -errno on error. + * + * Locking: - The runlist must be unlocked on entry and is unlocked on return. + * - This function takes the runlist lock for writing and modifies the + * runlist. + */ +int ntfs_map_runlist(ntfs_inode *ni, VCN vcn) +{ + int err = 0; + + down_write(&ni->runlist.lock); + /* Make sure someone else didn't do the work while we were sleeping. */ + if (likely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) <= + LCN_RL_NOT_MAPPED)) + err = ntfs_map_runlist_nolock(ni, vcn); + up_write(&ni->runlist.lock); + return err; +} + +/** + * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode + * @ni: ntfs inode of the attribute whose runlist to search + * @vcn: vcn to convert + * @write_locked: true if the runlist is locked for writing + * + * Find the virtual cluster number @vcn in the runlist of the ntfs attribute + * described by the ntfs inode @ni and return the corresponding logical cluster + * number (lcn). + * + * If the @vcn is not mapped yet, the attempt is made to map the attribute + * extent containing the @vcn and the vcn to lcn conversion is retried. + * + * If @write_locked is true the caller has locked the runlist for writing and + * if false for reading. + * + * Since lcns must be >= 0, we use negative return codes with special meaning: + * + * Return code Meaning / Description + * ========================================== + * LCN_HOLE Hole / not allocated on disk. + * LCN_ENOENT There is no such vcn in the runlist, i.e. @vcn is out of bounds. + * LCN_ENOMEM Not enough memory to map runlist. + * LCN_EIO Critical error (runlist/file is corrupt, i/o error, etc). + * + * Locking: - The runlist must be locked on entry and is left locked on return. + * - If @write_locked is FALSE, i.e. the runlist is locked for reading, + * the lock may be dropped inside the function so you cannot rely on + * the runlist still being the same when this function returns. + */ +LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, + const BOOL write_locked) +{ + LCN lcn; + BOOL is_retry = FALSE; + + ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", + ni->mft_no, (unsigned long long)vcn, + write_locked ? "write" : "read"); + BUG_ON(!ni); + BUG_ON(!NInoNonResident(ni)); + BUG_ON(vcn < 0); +retry_remap: + /* Convert vcn to lcn. If that fails map the runlist and retry once. */ + lcn = ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn); + if (likely(lcn >= LCN_HOLE)) { + ntfs_debug("Done, lcn 0x%llx.", (long long)lcn); + return lcn; + } + if (lcn != LCN_RL_NOT_MAPPED) { + if (lcn != LCN_ENOENT) + lcn = LCN_EIO; + } else if (!is_retry) { + int err; + + if (!write_locked) { + up_read(&ni->runlist.lock); + down_write(&ni->runlist.lock); + if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) != + LCN_RL_NOT_MAPPED)) { + up_write(&ni->runlist.lock); + down_read(&ni->runlist.lock); + goto retry_remap; + } + } + err = ntfs_map_runlist_nolock(ni, vcn); + if (!write_locked) { + up_write(&ni->runlist.lock); + down_read(&ni->runlist.lock); + } + if (likely(!err)) { + is_retry = TRUE; + goto retry_remap; + } + if (err == -ENOENT) + lcn = LCN_ENOENT; + else if (err == -ENOMEM) + lcn = LCN_ENOMEM; + else + lcn = LCN_EIO; + } + if (lcn != LCN_ENOENT) + ntfs_error(ni->vol->sb, "Failed with error code %lli.", + (long long)lcn); + return lcn; +} + +/** + * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode + * @ni: ntfs inode describing the runlist to search + * @vcn: vcn to find + * @write_locked: true if the runlist is locked for writing * * Find the virtual cluster number @vcn in the runlist described by the ntfs * inode @ni and return the address of the runlist element containing the @vcn. - * The runlist is left locked and the caller has to unlock it. If @need_write - * is true, the runlist is locked for writing and if @need_write is false, the - * runlist is locked for reading. In the error case, the runlist is not left - * locked. + * + * If the @vcn is not mapped yet, the attempt is made to map the attribute + * extent containing the @vcn and the vcn to lcn conversion is retried. + * + * If @write_locked is true the caller has locked the runlist for writing and + * if false for reading. * * Note you need to distinguish between the lcn of the returned runlist element * being >= 0 and LCN_HOLE. In the later case you have to return zeroes on @@ -118,34 +224,29 @@ * -ENOMEM - Not enough memory to map runlist. * -EIO - Critical error (runlist/file is corrupt, i/o error, etc). * - * Locking: - The runlist must be unlocked on entry. - * - On failing return, the runlist is unlocked. - * - On successful return, the runlist is locked. If @need_write us - * true, it is locked for writing. Otherwise is is locked for - * reading. + * Locking: - The runlist must be locked on entry and is left locked on return. + * - If @write_locked is FALSE, i.e. the runlist is locked for reading, + * the lock may be dropped inside the function so you cannot rely on + * the runlist still being the same when this function returns. */ -runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, - const BOOL need_write) +runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn, + const BOOL write_locked) { runlist_element *rl; int err = 0; BOOL is_retry = FALSE; - ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, lock for %sing.", + ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.", ni->mft_no, (unsigned long long)vcn, - !need_write ? "read" : "writ"); + write_locked ? "write" : "read"); BUG_ON(!ni); BUG_ON(!NInoNonResident(ni)); BUG_ON(vcn < 0); -lock_retry_remap: - if (!need_write) - down_read(&ni->runlist.lock); - else - down_write(&ni->runlist.lock); +retry_remap: rl = ni->runlist.rl; if (likely(rl && vcn >= rl[0].vcn)) { while (likely(rl->length)) { - if (likely(vcn < rl[1].vcn)) { + if (unlikely(vcn < rl[1].vcn)) { if (likely(rl->lcn >= LCN_HOLE)) { ntfs_debug("Done."); return rl; @@ -161,19 +262,29 @@ err = -EIO; } } - if (!need_write) - up_read(&ni->runlist.lock); - else - up_write(&ni->runlist.lock); if (!err && !is_retry) { /* * The @vcn is in an unmapped region, map the runlist and * retry. */ - err = ntfs_map_runlist(ni, vcn); + if (!write_locked) { + up_read(&ni->runlist.lock); + down_write(&ni->runlist.lock); + if (unlikely(ntfs_rl_vcn_to_lcn(ni->runlist.rl, vcn) != + LCN_RL_NOT_MAPPED)) { + up_write(&ni->runlist.lock); + down_read(&ni->runlist.lock); + goto retry_remap; + } + } + err = ntfs_map_runlist_nolock(ni, vcn); + if (!write_locked) { + up_write(&ni->runlist.lock); + down_read(&ni->runlist.lock); + } if (likely(!err)) { is_retry = TRUE; - goto lock_retry_remap; + goto retry_remap; } /* * -EINVAL and -ENOENT coming from a failed mapping attempt are @@ -184,7 +295,8 @@ err = -EIO; } else if (!err) err = -EIO; - ntfs_error(ni->vol->sb, "Failed with error code %i.", err); + if (err != -ENOENT) + ntfs_error(ni->vol->sb, "Failed with error code %i.", err); return ERR_PTR(err); } @@ -945,6 +1057,8 @@ return; } +#ifdef NTFS_RW + /** * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file * @vol: ntfs volume to which the attribute belongs @@ -1024,27 +1138,21 @@ * Check whether the attribute of @type on the ntfs volume @vol is allowed to * be non-resident. This information is obtained from $AttrDef system file. * - * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, or + * Return 0 if the attribute is allowed to be non-resident, -EPERM if not, and * -ENOENT if the attribute is not listed in $AttrDef. */ int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, const ATTR_TYPE type) { ATTR_DEF *ad; - /* - * $DATA is always allowed to be non-resident even if $AttrDef does not - * specify this in the flags of the $DATA attribute definition record. - */ - if (type == AT_DATA) - return 0; /* Find the attribute definition record in $AttrDef. */ ad = ntfs_attr_find_in_attrdef(vol, type); if (unlikely(!ad)) return -ENOENT; /* Check the flags and return the result. */ - if (ad->flags & CAN_BE_NON_RESIDENT) - return 0; - return -EPERM; + if (ad->flags & ATTR_DEF_RESIDENT) + return -EPERM; + return 0; } /** @@ -1067,9 +1175,9 @@ */ int ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type) { - if (type != AT_INDEX_ALLOCATION && type != AT_EA) - return 0; - return -EPERM; + if (type == AT_INDEX_ALLOCATION || type == AT_EA) + return -EPERM; + return 0; } /** @@ -1117,6 +1225,320 @@ } /** + * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute + * @ni: ntfs inode describing the attribute to convert + * + * Convert the resident ntfs attribute described by the ntfs inode @ni to a + * non-resident one. + * + * Return 0 on success and -errno on error. The following error return codes + * are defined: + * -EPERM - The attribute is not allowed to be non-resident. + * -ENOMEM - Not enough memory. + * -ENOSPC - Not enough disk space. + * -EINVAL - Attribute not defined on the volume. + * -EIO - I/o error or other error. + * Note that -ENOSPC is also returned in the case that there is not enough + * space in the mft record to do the conversion. This can happen when the mft + * record is already very full. The caller is responsible for trying to make + * space in the mft record and trying again. FIXME: Do we need a separate + * error return code for this kind of -ENOSPC or is it always worth trying + * again in case the attribute may then fit in a resident state so no need to + * make it non-resident at all? Ho-hum... (AIA) + * + * NOTE to self: No changes in the attribute list are required to move from + * a resident to a non-resident attribute. + * + * Locking: - The caller must hold i_sem on the inode. + */ +int ntfs_attr_make_non_resident(ntfs_inode *ni) +{ + s64 new_size; + struct inode *vi = VFS_I(ni); + ntfs_volume *vol = ni->vol; + ntfs_inode *base_ni; + MFT_RECORD *m; + ATTR_RECORD *a; + ntfs_attr_search_ctx *ctx; + struct page *page; + runlist_element *rl; + u8 *kaddr; + unsigned long flags; + int mp_size, mp_ofs, name_ofs, arec_size, err, err2; + u32 attr_size; + u8 old_res_attr_flags; + + /* Check that the attribute is allowed to be non-resident. */ + err = ntfs_attr_can_be_non_resident(vol, ni->type); + if (unlikely(err)) { + if (err == -EPERM) + ntfs_debug("Attribute is not allowed to be " + "non-resident."); + else + ntfs_debug("Attribute not defined on the NTFS " + "volume!"); + return err; + } + /* + * The size needs to be aligned to a cluster boundary for allocation + * purposes. + */ + new_size = (i_size_read(vi) + vol->cluster_size - 1) & + ~(vol->cluster_size - 1); + if (new_size > 0) { + /* + * Will need the page later and since the page lock nests + * outside all ntfs locks, we need to get the page now. + */ + page = find_or_create_page(vi->i_mapping, 0, + mapping_gfp_mask(vi->i_mapping)); + if (unlikely(!page)) + return -ENOMEM; + /* Start by allocating clusters to hold the attribute value. */ + rl = ntfs_cluster_alloc(vol, 0, new_size >> + vol->cluster_size_bits, -1, DATA_ZONE); + if (IS_ERR(rl)) { + err = PTR_ERR(rl); + ntfs_debug("Failed to allocate cluster%s, error code " + "%i.\n", (new_size >> + vol->cluster_size_bits) > 1 ? "s" : "", + err); + goto page_err_out; + } + } else { + rl = NULL; + page = NULL; + } + /* Determine the size of the mapping pairs array. */ + mp_size = ntfs_get_size_for_mapping_pairs(vol, rl, 0); + if (unlikely(mp_size < 0)) { + err = mp_size; + ntfs_debug("Failed to get size for mapping pairs array, error " + "code %i.", err); + goto rl_err_out; + } + down_write(&ni->runlist.lock); + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = ntfs_attr_get_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, + CASE_SENSITIVE, 0, NULL, 0, ctx); + if (unlikely(err)) { + if (err == -ENOENT) + err = -EIO; + goto err_out; + } + m = ctx->mrec; + a = ctx->attr; + BUG_ON(NInoNonResident(ni)); + BUG_ON(a->non_resident); + /* + * Calculate new offsets for the name and the mapping pairs array. + * We assume the attribute is not compressed or sparse. + */ + name_ofs = (offsetof(ATTR_REC, + data.non_resident.compressed_size) + 7) & ~7; + mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; + /* + * Determine the size of the resident part of the now non-resident + * attribute record. + */ + arec_size = (mp_ofs + mp_size + 7) & ~7; + /* + * If the page is not uptodate bring it uptodate by copying from the + * attribute value. + */ + attr_size = le32_to_cpu(a->data.resident.value_length); + BUG_ON(attr_size != i_size_read(vi)); + if (page && !PageUptodate(page)) { + kaddr = kmap_atomic(page, KM_USER0); + memcpy(kaddr, (u8*)a + + le16_to_cpu(a->data.resident.value_offset), + attr_size); + memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); + SetPageUptodate(page); + } + /* Backup the attribute flag. */ + old_res_attr_flags = a->data.resident.flags; + /* Resize the resident part of the attribute record. */ + err = ntfs_attr_record_resize(m, a, arec_size); + if (unlikely(err)) + goto err_out; + /* + * Convert the resident part of the attribute record to describe a + * non-resident attribute. + */ + a->non_resident = 1; + /* Move the attribute name if it exists and update the offset. */ + if (a->name_length) + memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), + a->name_length * sizeof(ntfschar)); + a->name_offset = cpu_to_le16(name_ofs); + /* + * FIXME: For now just clear all of these as we do not support them + * when writing. + */ + a->flags &= cpu_to_le16(0xffff & ~le16_to_cpu(ATTR_IS_SPARSE | + ATTR_IS_ENCRYPTED | ATTR_COMPRESSION_MASK)); + /* Setup the fields specific to non-resident attributes. */ + a->data.non_resident.lowest_vcn = 0; + a->data.non_resident.highest_vcn = cpu_to_sle64((new_size - 1) >> + vol->cluster_size_bits); + a->data.non_resident.mapping_pairs_offset = cpu_to_le16(mp_ofs); + a->data.non_resident.compression_unit = 0; + memset(&a->data.non_resident.reserved, 0, + sizeof(a->data.non_resident.reserved)); + a->data.non_resident.allocated_size = cpu_to_sle64(new_size); + a->data.non_resident.data_size = + a->data.non_resident.initialized_size = + cpu_to_sle64(attr_size); + /* Generate the mapping pairs array into the attribute record. */ + err = ntfs_mapping_pairs_build(vol, (u8*)a + mp_ofs, + arec_size - mp_ofs, rl, 0, NULL); + if (unlikely(err)) { + ntfs_debug("Failed to build mapping pairs, error code %i.", + err); + goto undo_err_out; + } + /* Setup the in-memory attribute structure to be non-resident. */ + /* + * FIXME: For now just clear all of these as we do not support them + * when writing. + */ + NInoClearSparse(ni); + NInoClearEncrypted(ni); + NInoClearCompressed(ni); + ni->runlist.rl = rl; + write_lock_irqsave(&ni->size_lock, flags); + ni->allocated_size = new_size; + write_unlock_irqrestore(&ni->size_lock, flags); + /* + * This needs to be last since the address space operations ->readpage + * and ->writepage can run concurrently with us as they are not + * serialized on i_sem. Note, we are not allowed to fail once we flip + * this switch, which is another reason to do this last. + */ + NInoSetNonResident(ni); + /* Mark the mft record dirty, so it gets written back. */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); + ntfs_attr_put_search_ctx(ctx); + unmap_mft_record(base_ni); + up_write(&ni->runlist.lock); + if (page) { + set_page_dirty(page); + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + } + ntfs_debug("Done."); + return 0; +undo_err_out: + /* Convert the attribute back into a resident attribute. */ + a->non_resident = 0; + /* Move the attribute name if it exists and update the offset. */ + name_ofs = (offsetof(ATTR_RECORD, data.resident.reserved) + + sizeof(a->data.resident.reserved) + 7) & ~7; + if (a->name_length) + memmove((u8*)a + name_ofs, (u8*)a + le16_to_cpu(a->name_offset), + a->name_length * sizeof(ntfschar)); + mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7; + a->name_offset = cpu_to_le16(name_ofs); + arec_size = (mp_ofs + attr_size + 7) & ~7; + /* Resize the resident part of the attribute record. */ + err2 = ntfs_attr_record_resize(m, a, arec_size); + if (unlikely(err2)) { + /* + * This cannot happen (well if memory corruption is at work it + * could happen in theory), but deal with it as well as we can. + * If the old size is too small, truncate the attribute, + * otherwise simply give it a larger allocated size. + * FIXME: Should check whether chkdsk complains when the + * allocated size is much bigger than the resident value size. + */ + arec_size = le32_to_cpu(a->length); + if ((mp_ofs + attr_size) > arec_size) { + err2 = attr_size; + attr_size = arec_size - mp_ofs; + ntfs_error(vol->sb, "Failed to undo partial resident " + "to non-resident attribute " + "conversion. Truncating inode 0x%lx, " + "attribute type 0x%x from %i bytes to " + "%i bytes to maintain metadata " + "consistency. THIS MEANS YOU ARE " + "LOSING %i BYTES DATA FROM THIS %s.", + vi->i_ino, + (unsigned)le32_to_cpu(ni->type), + err2, attr_size, err2 - attr_size, + ((ni->type == AT_DATA) && + !ni->name_len) ? "FILE": "ATTRIBUTE"); + write_lock_irqsave(&ni->size_lock, flags); + ni->initialized_size = attr_size; + i_size_write(vi, attr_size); + write_unlock_irqrestore(&ni->size_lock, flags); + } + } + /* Setup the fields specific to resident attributes. */ + a->data.resident.value_length = cpu_to_le32(attr_size); + a->data.resident.value_offset = cpu_to_le16(mp_ofs); + a->data.resident.flags = old_res_attr_flags; + memset(&a->data.resident.reserved, 0, + sizeof(a->data.resident.reserved)); + /* Copy the data from the page back to the attribute value. */ + if (page) { + kaddr = kmap_atomic(page, KM_USER0); + memcpy((u8*)a + mp_ofs, kaddr, attr_size); + kunmap_atomic(kaddr, KM_USER0); + } + /* Setup the allocated size in the ntfs inode in case it changed. */ + write_lock_irqsave(&ni->size_lock, flags); + ni->allocated_size = arec_size - mp_ofs; + write_unlock_irqrestore(&ni->size_lock, flags); + /* Mark the mft record dirty, so it gets written back. */ + flush_dcache_mft_record_page(ctx->ntfs_ino); + mark_mft_record_dirty(ctx->ntfs_ino); +err_out: + if (ctx) + ntfs_attr_put_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + ni->runlist.rl = NULL; + up_write(&ni->runlist.lock); +rl_err_out: + if (rl) { + if (ntfs_cluster_free_from_rl(vol, rl) < 0) { + ntfs_error(vol->sb, "Failed to release allocated " + "cluster(s) in error code path. Run " + "chkdsk to recover the lost " + "cluster(s)."); + NVolSetErrors(vol); + } + ntfs_free(rl); +page_err_out: + unlock_page(page); + page_cache_release(page); + } + if (err == -EINVAL) + err = -EIO; + return err; +} + +/** * ntfs_attr_set - fill (a part of) an attribute with a byte * @ni: ntfs inode describing the attribute to fill * @ofs: offset inside the attribute at which to start to fill @@ -1127,6 +1549,10 @@ * byte offset @ofs inside the attribute with the constant byte @val. * * This function is effectively like memset() applied to an ntfs attribute. + * Note thie function actually only operates on the page cache pages belonging + * to the ntfs attribute and it marks them dirty after doing the memset(). + * Thus it relies on the vm dirty page write code paths to cause the modified + * pages to be written to the mft record/disk. * * Return 0 on success and -errno on error. An error code of -ESPIPE means * that @ofs + @cnt were outside the end of the attribute and no write was @@ -1155,7 +1581,7 @@ end = ofs + cnt; end_ofs = end & ~PAGE_CACHE_MASK; /* If the end is outside the inode size return -ESPIPE. */ - if (unlikely(end > VFS_I(ni)->i_size)) { + if (unlikely(end > i_size_read(VFS_I(ni)))) { ntfs_error(vol->sb, "Request exceeds end of attribute."); return -ESPIPE; } @@ -1256,3 +1682,5 @@ ntfs_debug("Done."); return 0; } + +#endif /* NTFS_RW */ Index: fs/ntfs/attrib.h =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/attrib.h (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/attrib.h (mode:100644) @@ -2,7 +2,7 @@ * attrib.h - Defines for attribute handling in NTFS Linux kernel driver. * Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -60,10 +60,14 @@ ATTR_RECORD *base_attr; } ntfs_attr_search_ctx; +extern int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn); extern int ntfs_map_runlist(ntfs_inode *ni, VCN vcn); -extern runlist_element *ntfs_find_vcn(ntfs_inode *ni, const VCN vcn, - const BOOL need_write); +extern LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn, + const BOOL write_locked); + +extern runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, + const VCN vcn, const BOOL write_locked); int ntfs_attr_lookup(const ATTR_TYPE type, const ntfschar *name, const u32 name_len, const IGNORE_CASE_BOOL ic, @@ -85,6 +89,8 @@ MFT_RECORD *mrec); extern void ntfs_attr_put_search_ctx(ntfs_attr_search_ctx *ctx); +#ifdef NTFS_RW + extern int ntfs_attr_size_bounds_check(const ntfs_volume *vol, const ATTR_TYPE type, const s64 size); extern int ntfs_attr_can_be_non_resident(const ntfs_volume *vol, @@ -94,7 +100,11 @@ extern int ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size); +extern int ntfs_attr_make_non_resident(ntfs_inode *ni); + extern int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val); +#endif /* NTFS_RW */ + #endif /* _LINUX_NTFS_ATTRIB_H */ Index: fs/ntfs/compress.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/compress.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/compress.c (mode:100644) @@ -96,13 +96,14 @@ /** * zero_partial_compressed_page - zero out of bounds compressed page region */ -static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page) +static void zero_partial_compressed_page(struct page *page, + const s64 initialized_size) { u8 *kp = page_address(page); unsigned int kp_ofs; ntfs_debug("Zeroing page region outside initialized size."); - if (((s64)page->index << PAGE_CACHE_SHIFT) >= ni->initialized_size) { + if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) { /* * FIXME: Using clear_page() will become wrong when we get * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem. @@ -110,7 +111,7 @@ clear_page(kp); return; } - kp_ofs = ni->initialized_size & ~PAGE_CACHE_MASK; + kp_ofs = initialized_size & ~PAGE_CACHE_MASK; memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs); return; } @@ -118,12 +119,12 @@ /** * handle_bounds_compressed_page - test for&handle out of bounds compressed page */ -static inline void handle_bounds_compressed_page(ntfs_inode *ni, - struct page *page) +static inline void handle_bounds_compressed_page(struct page *page, + const loff_t i_size, const s64 initialized_size) { - if ((page->index >= (ni->initialized_size >> PAGE_CACHE_SHIFT)) && - (ni->initialized_size < VFS_I(ni)->i_size)) - zero_partial_compressed_page(ni, page); + if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) && + (initialized_size < i_size)) + zero_partial_compressed_page(page, initialized_size); return; } @@ -138,6 +139,8 @@ * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT) * @cb_start: compression block to decompress (IN) * @cb_size: size of compression block @cb_start in bytes (IN) + * @i_size: file size when we started the read (IN) + * @initialized_size: initialized file size when we started the read (IN) * * The caller must have disabled preemption. ntfs_decompress() reenables it when * the critical section is finished. @@ -165,7 +168,8 @@ static int ntfs_decompress(struct page *dest_pages[], int *dest_index, int *dest_ofs, const int dest_max_index, const int dest_max_ofs, const int xpage, char *xpage_done, u8 *const cb_start, - const u32 cb_size) + const u32 cb_size, const loff_t i_size, + const s64 initialized_size) { /* * Pointers into the compressed data, i.e. the compression block (cb), @@ -219,9 +223,6 @@ spin_unlock(&ntfs_cb_lock); /* Second stage: finalize completed pages. */ if (nr_completed_pages > 0) { - struct page *page = dest_pages[completed_pages[0]]; - ntfs_inode *ni = NTFS_I(page->mapping->host); - for (i = 0; i < nr_completed_pages; i++) { int di = completed_pages[i]; @@ -230,7 +231,8 @@ * If we are outside the initialized size, zero * the out of bounds page range. */ - handle_bounds_compressed_page(ni, dp); + handle_bounds_compressed_page(dp, i_size, + initialized_size); flush_dcache_page(dp); kunmap(dp); SetPageUptodate(dp); @@ -478,12 +480,14 @@ */ int ntfs_read_compressed_block(struct page *page) { + loff_t i_size; + s64 initialized_size; struct address_space *mapping = page->mapping; ntfs_inode *ni = NTFS_I(mapping->host); ntfs_volume *vol = ni->vol; struct super_block *sb = vol->sb; runlist_element *rl; - unsigned long block_size = sb->s_blocksize; + unsigned long flags, block_size = sb->s_blocksize; unsigned char block_size_bits = sb->s_blocksize_bits; u8 *cb, *cb_pos, *cb_end; struct buffer_head **bhs; @@ -552,8 +556,12 @@ * The remaining pages need to be allocated and inserted into the page * cache, alignment guarantees keep all the below much simpler. (-8 */ - max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT) - offset; + read_lock_irqsave(&ni->size_lock, flags); + i_size = i_size_read(VFS_I(ni)); + initialized_size = ni->initialized_size; + read_unlock_irqrestore(&ni->size_lock, flags); + max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - + offset; if (nr_pages < max_page) max_page = nr_pages; for (i = 0; i < max_page; i++, offset++) { @@ -824,7 +832,8 @@ * If we are outside the initialized size, zero * the out of bounds page range. */ - handle_bounds_compressed_page(ni, page); + handle_bounds_compressed_page(page, i_size, + initialized_size); flush_dcache_page(page); kunmap(page); SetPageUptodate(page); @@ -847,7 +856,8 @@ ntfs_debug("Found compressed compression block."); err = ntfs_decompress(pages, &cur_page, &cur_ofs, cb_max_page, cb_max_ofs, xpage, &xpage_done, - cb_pos, cb_size - (cb_pos - cb)); + cb_pos, cb_size - (cb_pos - cb), i_size, + initialized_size); /* * We can sleep from now on, lock already dropped by * ntfs_decompress(). Index: fs/ntfs/debug.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/debug.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/debug.c (mode:100644) @@ -164,14 +164,17 @@ if (index > -LCN_ENOENT - 1) index = 3; printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", - (rl + i)->vcn, lcn_str[index], - (rl + i)->length, (rl + i)->length ? - "" : " (runlist end)"); + (long long)(rl + i)->vcn, lcn_str[index], + (long long)(rl + i)->length, + (rl + i)->length ? "" : + " (runlist end)"); } else printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", - (rl + i)->vcn, (rl + i)->lcn, - (rl + i)->length, (rl + i)->length ? - "" : " (runlist end)"); + (long long)(rl + i)->vcn, + (long long)(rl + i)->lcn, + (long long)(rl + i)->length, + (rl + i)->length ? "" : + " (runlist end)"); if (!(rl + i)->length) break; } Index: fs/ntfs/dir.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/dir.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/dir.c (mode:100644) @@ -1,7 +1,7 @@ /** * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -183,8 +183,7 @@ name->len = 0; *res = name; } else { - if (name) - kfree(name); + kfree(name); *res = NULL; } mref = le64_to_cpu(ie->data.dir.indexed_file); @@ -444,8 +443,7 @@ name->len = 0; *res = name; } else { - if (name) - kfree(name); + kfree(name); *res = NULL; } mref = le64_to_cpu(ie->data.dir.indexed_file); @@ -610,7 +608,7 @@ // TODO: (AIA) // The algorithm embedded in this code will be required for the time when we // want to support adding of entries to directories, where we require correct -// collation of file names in order not to cause corruption of the file system. +// collation of file names in order not to cause corruption of the filesystem. /** * ntfs_lookup_inode_by_name - find an inode in a directory given its name @@ -1101,7 +1099,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; - loff_t fpos; + loff_t fpos, i_size; struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode; struct super_block *sb = vdir->i_sb; ntfs_inode *ndir = NTFS_I(vdir); @@ -1122,7 +1120,8 @@ vdir->i_ino, fpos); rc = err = 0; /* Are we at end of dir yet? */ - if (fpos >= vdir->i_size + vol->mft_record_size) + i_size = i_size_read(vdir); + if (fpos >= i_size + vol->mft_record_size) goto done; /* Emulate . and .. for all directories. */ if (!fpos) { @@ -1264,7 +1263,7 @@ bmp_mapping = bmp_vi->i_mapping; /* Get the starting bitmap bit position and sanity check it. */ bmp_pos = ia_pos >> ndir->itype.index.block_size_bits; - if (unlikely(bmp_pos >> 3 >= bmp_vi->i_size)) { + if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) { ntfs_error(sb, "Current index allocation position exceeds " "index bitmap size."); goto err_out; @@ -1301,7 +1300,7 @@ goto get_next_bmp_page; } /* If we have reached the end of the bitmap, we are done. */ - if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= vdir->i_size)) + if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size)) goto unm_EOD; ia_pos = (bmp_pos + cur_bmp_pos) << ndir->itype.index.block_size_bits; @@ -1441,7 +1440,7 @@ ntfs_unmap_page(bmp_page); EOD: /* We are finished, set fpos to EOD. */ - fpos = vdir->i_size + vol->mft_record_size; + fpos = i_size + vol->mft_record_size; abort: kfree(name); done: @@ -1461,10 +1460,8 @@ unlock_page(ia_page); ntfs_unmap_page(ia_page); } - if (ir) - kfree(ir); - if (name) - kfree(name); + kfree(ir); + kfree(name); if (ctx) ntfs_attr_put_search_ctx(ctx); if (m) @@ -1495,7 +1492,7 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp) { if (sizeof(unsigned long) < 8) { - if (vi->i_size > MAX_LFS_FILESIZE) + if (i_size_read(vi) > MAX_LFS_FILESIZE) return -EFBIG; } return 0; Index: fs/ntfs/file.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/file.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/file.c (mode:100644) @@ -47,7 +47,7 @@ static int ntfs_file_open(struct inode *vi, struct file *filp) { if (sizeof(unsigned long) < 8) { - if (vi->i_size > MAX_LFS_FILESIZE) + if (i_size_read(vi) > MAX_LFS_FILESIZE) return -EFBIG; } return generic_file_open(vi, filp); Index: fs/ntfs/inode.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/inode.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/inode.c (mode:100644) @@ -1,7 +1,7 @@ /** * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -174,7 +174,7 @@ vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode, (set_t)ntfs_init_locked_inode, &na); - if (!vi) + if (unlikely(!vi)) return ERR_PTR(-ENOMEM); err = 0; @@ -188,7 +188,7 @@ * There is no point in keeping bad inodes around if the failure was * due to ENOMEM. We want to be able to retry again later. */ - if (err == -ENOMEM) { + if (unlikely(err == -ENOMEM)) { iput(vi); vi = ERR_PTR(err); } @@ -235,7 +235,7 @@ vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, (set_t)ntfs_init_locked_inode, &na); - if (!vi) + if (unlikely(!vi)) return ERR_PTR(-ENOMEM); err = 0; @@ -250,7 +250,7 @@ * simplifies things in that we never need to check for bad attribute * inodes elsewhere. */ - if (err) { + if (unlikely(err)) { iput(vi); vi = ERR_PTR(err); } @@ -290,7 +290,7 @@ vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, (set_t)ntfs_init_locked_inode, &na); - if (!vi) + if (unlikely(!vi)) return ERR_PTR(-ENOMEM); err = 0; @@ -305,7 +305,7 @@ * simplifies things in that we never need to check for bad index * inodes elsewhere. */ - if (err) { + if (unlikely(err)) { iput(vi); vi = ERR_PTR(err); } @@ -376,6 +376,7 @@ void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) { ntfs_debug("Entering."); + rwlock_init(&ni->size_lock); ni->initialized_size = ni->allocated_size = 0; ni->seq_no = 0; atomic_set(&ni->count, 1); @@ -524,6 +525,7 @@ ntfs_volume *vol = NTFS_SB(vi->i_sb); ntfs_inode *ni; MFT_RECORD *m; + ATTR_RECORD *a; STANDARD_INFORMATION *si; ntfs_attr_search_ctx *ctx; int err = 0; @@ -632,9 +634,10 @@ } goto unm_err_out; } + a = ctx->attr; /* Get the standard information attribute value. */ - si = (STANDARD_INFORMATION*)((char*)ctx->attr + - le16_to_cpu(ctx->attr->data.resident.value_offset)); + si = (STANDARD_INFORMATION*)((u8*)a + + le16_to_cpu(a->data.resident.value_offset)); /* Transfer information from the standard information into vi. */ /* @@ -673,15 +676,16 @@ goto skip_attr_list_load; ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); NInoSetAttrList(ni); - if (ctx->attr->flags & ATTR_IS_ENCRYPTED || - ctx->attr->flags & ATTR_COMPRESSION_MASK || - ctx->attr->flags & ATTR_IS_SPARSE) { + a = ctx->attr; + if (a->flags & ATTR_IS_ENCRYPTED || + a->flags & ATTR_COMPRESSION_MASK || + a->flags & ATTR_IS_SPARSE) { ntfs_error(vi->i_sb, "Attribute list attribute is " "compressed/encrypted/sparse."); goto unm_err_out; } /* Now allocate memory for the attribute list. */ - ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); + ni->attr_list_size = (u32)ntfs_attr_size(a); ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); if (!ni->attr_list) { ntfs_error(vi->i_sb, "Not enough memory to allocate " @@ -689,9 +693,9 @@ err = -ENOMEM; goto unm_err_out; } - if (ctx->attr->non_resident) { + if (a->non_resident) { NInoSetAttrListNonResident(ni); - if (ctx->attr->data.non_resident.lowest_vcn) { + if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "Attribute list has non " "zero lowest_vcn."); goto unm_err_out; @@ -701,7 +705,7 @@ * exclusive access to the inode at this time. */ ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, - ctx->attr, NULL); + a, NULL); if (IS_ERR(ni->attr_list_rl.rl)) { err = PTR_ERR(ni->attr_list_rl.rl); ni->attr_list_rl.rl = NULL; @@ -712,27 +716,26 @@ /* Now load the attribute list. */ if ((err = load_attribute_list(vol, &ni->attr_list_rl, ni->attr_list, ni->attr_list_size, - sle64_to_cpu(ctx->attr->data. - non_resident.initialized_size)))) { + sle64_to_cpu(a->data.non_resident. + initialized_size)))) { ntfs_error(vi->i_sb, "Failed to load " "attribute list attribute."); goto unm_err_out; } - } else /* if (!ctx.attr->non_resident) */ { - if ((u8*)ctx->attr + le16_to_cpu( - ctx->attr->data.resident.value_offset) + - le32_to_cpu( - ctx->attr->data.resident.value_length) > + } else /* if (!a->non_resident) */ { + if ((u8*)a + le16_to_cpu(a->data.resident.value_offset) + + le32_to_cpu( + a->data.resident.value_length) > (u8*)ctx->mrec + vol->mft_record_size) { ntfs_error(vi->i_sb, "Corrupt attribute list " "in inode."); goto unm_err_out; } /* Now copy the attribute list. */ - memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( - ctx->attr->data.resident.value_offset), + memcpy(ni->attr_list, (u8*)a + le16_to_cpu( + a->data.resident.value_offset), le32_to_cpu( - ctx->attr->data.resident.value_length)); + a->data.resident.value_length)); } } skip_attr_list_load: @@ -741,10 +744,11 @@ * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes. */ if (S_ISDIR(vi->i_mode)) { + loff_t bvi_size; struct inode *bvi; ntfs_inode *bni; INDEX_ROOT *ir; - char *ir_end, *index_end; + u8 *ir_end, *index_end; /* It is a directory, find index root attribute. */ ntfs_attr_reinit_search_ctx(ctx); @@ -760,17 +764,16 @@ } goto unm_err_out; } + a = ctx->attr; /* Set up the state. */ - if (unlikely(ctx->attr->non_resident)) { + if (unlikely(a->non_resident)) { ntfs_error(vol->sb, "$INDEX_ROOT attribute is not " "resident."); goto unm_err_out; } /* Ensure the attribute name is placed before the value. */ - if (unlikely(ctx->attr->name_length && - (le16_to_cpu(ctx->attr->name_offset) >= - le16_to_cpu(ctx->attr->data.resident. - value_offset)))) { + if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= + le16_to_cpu(a->data.resident.value_offset)))) { ntfs_error(vol->sb, "$INDEX_ROOT attribute name is " "placed after the attribute value."); goto unm_err_out; @@ -781,28 +784,27 @@ * encrypted. However index root cannot be both compressed and * encrypted. */ - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) + if (a->flags & ATTR_COMPRESSION_MASK) NInoSetCompressed(ni); - if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + if (a->flags & ATTR_IS_ENCRYPTED) { + if (a->flags & ATTR_COMPRESSION_MASK) { ntfs_error(vi->i_sb, "Found encrypted and " "compressed attribute."); goto unm_err_out; } NInoSetEncrypted(ni); } - if (ctx->attr->flags & ATTR_IS_SPARSE) + if (a->flags & ATTR_IS_SPARSE) NInoSetSparse(ni); - ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu( - ctx->attr->data.resident.value_offset)); - ir_end = (char*)ir + le32_to_cpu( - ctx->attr->data.resident.value_length); - if (ir_end > (char*)ctx->mrec + vol->mft_record_size) { + ir = (INDEX_ROOT*)((u8*)a + + le16_to_cpu(a->data.resident.value_offset)); + ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length); + if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) { ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " "corrupt."); goto unm_err_out; } - index_end = (char*)&ir->index + + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); if (index_end > ir_end) { ntfs_error(vi->i_sb, "Directory index is corrupt."); @@ -889,7 +891,8 @@ "attribute."); goto unm_err_out; } - if (!ctx->attr->non_resident) { + a = ctx->attr; + if (!a->non_resident) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " "is resident."); goto unm_err_out; @@ -898,42 +901,40 @@ * Ensure the attribute name is placed before the mapping pairs * array. */ - if (unlikely(ctx->attr->name_length && - (le16_to_cpu(ctx->attr->name_offset) >= - le16_to_cpu(ctx->attr->data.non_resident. - mapping_pairs_offset)))) { + if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= + le16_to_cpu( + a->data.non_resident.mapping_pairs_offset)))) { ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name " "is placed after the mapping pairs " "array."); goto unm_err_out; } - if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (a->flags & ATTR_IS_ENCRYPTED) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " "is encrypted."); goto unm_err_out; } - if (ctx->attr->flags & ATTR_IS_SPARSE) { + if (a->flags & ATTR_IS_SPARSE) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " "is sparse."); goto unm_err_out; } - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + if (a->flags & ATTR_COMPRESSION_MASK) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " "is compressed."); goto unm_err_out; } - if (ctx->attr->data.non_resident.lowest_vcn) { + if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of " "$INDEX_ALLOCATION attribute has non " "zero lowest_vcn."); goto unm_err_out; } - vi->i_size = sle64_to_cpu( - ctx->attr->data.non_resident.data_size); + vi->i_size = sle64_to_cpu(a->data.non_resident.data_size); ni->initialized_size = sle64_to_cpu( - ctx->attr->data.non_resident.initialized_size); + a->data.non_resident.initialized_size); ni->allocated_size = sle64_to_cpu( - ctx->attr->data.non_resident.allocated_size); + a->data.non_resident.allocated_size); /* * We are done with the mft record, so we release it. Otherwise * we would deadlock in ntfs_attr_iget(). @@ -958,11 +959,12 @@ goto unm_err_out; } /* Consistency check bitmap size vs. index allocation size. */ - if ((bvi->i_size << 3) < (vi->i_size >> + bvi_size = i_size_read(bvi); + if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) { ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) " "for index allocation (0x%llx).", - bvi->i_size << 3, vi->i_size); + bvi_size << 3, vi->i_size); goto unm_err_out; } skip_large_dir_stuff: @@ -1010,87 +1012,92 @@ ntfs_error(vi->i_sb, "$DATA attribute is missing."); goto unm_err_out; } + a = ctx->attr; /* Setup the state. */ - if (ctx->attr->non_resident) { + if (a->non_resident) { NInoSetNonResident(ni); - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { - NInoSetCompressed(ni); - if (vol->cluster_size > 4096) { - ntfs_error(vi->i_sb, "Found " - "compressed data but " - "compression is disabled due " - "to cluster size (%i) > 4kiB.", - vol->cluster_size); - goto unm_err_out; - } - if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) - != ATTR_IS_COMPRESSED) { - ntfs_error(vi->i_sb, "Found " - "unknown compression method or " - "corrupt file."); - goto unm_err_out; + if (a->flags & (ATTR_COMPRESSION_MASK | + ATTR_IS_SPARSE)) { + if (a->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found " + "compressed data but " + "compression is " + "disabled due to " + "cluster size (%i) > " + "4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((a->flags & ATTR_COMPRESSION_MASK) + != ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found " + "unknown compression " + "method or corrupt " + "file."); + goto unm_err_out; + } } - ni->itype.compressed.block_clusters = 1U << - ctx->attr->data.non_resident. - compression_unit; - if (ctx->attr->data.non_resident. - compression_unit != 4) { + if (a->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + if (a->data.non_resident.compression_unit != + 4) { ntfs_error(vi->i_sb, "Found " "nonstandard compression unit " "(%u instead of 4). Cannot " "handle this.", - ctx->attr->data.non_resident. + a->data.non_resident. compression_unit); err = -EOPNOTSUPP; goto unm_err_out; } + ni->itype.compressed.block_clusters = 1U << + a->data.non_resident. + compression_unit; ni->itype.compressed.block_size = 1U << ( - ctx->attr->data.non_resident. + a->data.non_resident. compression_unit + vol->cluster_size_bits); ni->itype.compressed.block_size_bits = ffs( - ni->itype.compressed.block_size) - 1; + ni->itype.compressed. + block_size) - 1; + ni->itype.compressed.size = sle64_to_cpu( + a->data.non_resident. + compressed_size); } - if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + if (a->flags & ATTR_IS_ENCRYPTED) { + if (a->flags & ATTR_COMPRESSION_MASK) { ntfs_error(vi->i_sb, "Found encrypted " "and compressed data."); goto unm_err_out; } NInoSetEncrypted(ni); } - if (ctx->attr->flags & ATTR_IS_SPARSE) - NInoSetSparse(ni); - if (ctx->attr->data.non_resident.lowest_vcn) { + if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of $DATA " "attribute has non zero " "lowest_vcn."); goto unm_err_out; } - /* Setup all the sizes. */ vi->i_size = sle64_to_cpu( - ctx->attr->data.non_resident.data_size); + a->data.non_resident.data_size); ni->initialized_size = sle64_to_cpu( - ctx->attr->data.non_resident. - initialized_size); + a->data.non_resident.initialized_size); ni->allocated_size = sle64_to_cpu( - ctx->attr->data.non_resident. - allocated_size); - if (NInoCompressed(ni)) { - ni->itype.compressed.size = sle64_to_cpu( - ctx->attr->data.non_resident. - compressed_size); - } + a->data.non_resident.allocated_size); } else { /* Resident attribute. */ - /* - * Make all sizes equal for simplicity in read code - * paths. FIXME: Need to keep this in mind when - * converting to non-resident attribute in write code - * path. (Probably only affects truncate().) - */ - vi->i_size = ni->initialized_size = ni->allocated_size = - le32_to_cpu( - ctx->attr->data.resident.value_length); + vi->i_size = ni->initialized_size = le32_to_cpu( + a->data.resident.value_length); + ni->allocated_size = le32_to_cpu(a->length) - + le16_to_cpu( + a->data.resident.value_offset); + if (vi->i_size > ni->allocated_size) { + ntfs_error(vi->i_sb, "Resident data attribute " + "is corrupt (size exceeds " + "allocation)."); + goto unm_err_out; + } } no_data_attr_special_case: /* We are done with the mft record, so we release it. */ @@ -1117,11 +1124,10 @@ * sizes of all non-resident attributes present to give us the Linux * correct size that should go into i_blocks (after division by 512). */ - if (S_ISDIR(vi->i_mode) || !NInoCompressed(ni)) - vi->i_blocks = ni->allocated_size >> 9; - else + if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni))) vi->i_blocks = ni->itype.compressed.size >> 9; - + else + vi->i_blocks = ni->allocated_size >> 9; ntfs_debug("Done."); return 0; @@ -1166,6 +1172,7 @@ ntfs_volume *vol = NTFS_SB(vi->i_sb); ntfs_inode *ni, *base_ni; MFT_RECORD *m; + ATTR_RECORD *a; ntfs_attr_search_ctx *ctx; int err = 0; @@ -1200,24 +1207,21 @@ err = -ENOMEM; goto unm_err_out; } - /* Find the attribute. */ err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, CASE_SENSITIVE, 0, NULL, 0, ctx); if (unlikely(err)) goto unm_err_out; - - if (!ctx->attr->non_resident) { + a = ctx->attr; + if (!a->non_resident) { /* Ensure the attribute name is placed before the value. */ - if (unlikely(ctx->attr->name_length && - (le16_to_cpu(ctx->attr->name_offset) >= - le16_to_cpu(ctx->attr->data.resident. - value_offset)))) { + if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= + le16_to_cpu(a->data.resident.value_offset)))) { ntfs_error(vol->sb, "Attribute name is placed after " "the attribute value."); goto unm_err_out; } - if (NInoMstProtected(ni) || ctx->attr->flags) { + if (NInoMstProtected(ni) || a->flags) { ntfs_error(vi->i_sb, "Found mst protected attribute " "or attribute with non-zero flags but " "the attribute is resident. Please " @@ -1225,85 +1229,95 @@ "linux-ntfs-dev@lists.sourceforge.net"); goto unm_err_out; } - /* - * Resident attribute. Make all sizes equal for simplicity in - * read code paths. - */ - vi->i_size = ni->initialized_size = ni->allocated_size = - le32_to_cpu(ctx->attr->data.resident.value_length); + vi->i_size = ni->initialized_size = le32_to_cpu( + a->data.resident.value_length); + ni->allocated_size = le32_to_cpu(a->length) - + le16_to_cpu(a->data.resident.value_offset); + if (vi->i_size > ni->allocated_size) { + ntfs_error(vi->i_sb, "Resident attribute is corrupt " + "(size exceeds allocation)."); + goto unm_err_out; + } } else { NInoSetNonResident(ni); /* * Ensure the attribute name is placed before the mapping pairs * array. */ - if (unlikely(ctx->attr->name_length && - (le16_to_cpu(ctx->attr->name_offset) >= - le16_to_cpu(ctx->attr->data.non_resident. - mapping_pairs_offset)))) { + if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= + le16_to_cpu( + a->data.non_resident.mapping_pairs_offset)))) { ntfs_error(vol->sb, "Attribute name is placed after " "the mapping pairs array."); goto unm_err_out; } - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) { + if (a->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if ((ni->type != AT_DATA) || (ni->type == + AT_DATA && ni->name_len)) { + ntfs_error(vi->i_sb, "Found compressed " + "non-data or named " + "data attribute. " + "Please report you " + "saw this message to " + "linux-ntfs-dev@lists." + "sourceforge.net"); + goto unm_err_out; + } + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found compressed " + "attribute but " + "compression is " + "disabled due to " + "cluster size (%i) > " + "4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((a->flags & ATTR_COMPRESSION_MASK) != + ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found unknown " + "compression method."); + goto unm_err_out; + } + } if (NInoMstProtected(ni)) { ntfs_error(vi->i_sb, "Found mst protected " "attribute but the attribute " - "is compressed. Please report " - "you saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net"); - goto unm_err_out; - } - NInoSetCompressed(ni); - if ((ni->type != AT_DATA) || (ni->type == AT_DATA && - ni->name_len)) { - ntfs_error(vi->i_sb, "Found compressed " - "non-data or named data " - "attribute. Please report " - "you saw this message to " + "is %s. Please report you " + "saw this message to " "linux-ntfs-dev@lists." - "sourceforge.net"); - goto unm_err_out; - } - if (vol->cluster_size > 4096) { - ntfs_error(vi->i_sb, "Found compressed " - "attribute but compression is " - "disabled due to cluster size " - "(%i) > 4kiB.", - vol->cluster_size); + "sourceforge.net", + NInoCompressed(ni) ? + "compressed" : "sparse"); goto unm_err_out; } - if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) - != ATTR_IS_COMPRESSED) { - ntfs_error(vi->i_sb, "Found unknown " - "compression method."); - goto unm_err_out; - } - ni->itype.compressed.block_clusters = 1U << - ctx->attr->data.non_resident. - compression_unit; - if (ctx->attr->data.non_resident.compression_unit != - 4) { + if (a->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + if (a->data.non_resident.compression_unit != 4) { ntfs_error(vi->i_sb, "Found nonstandard " "compression unit (%u instead " "of 4). Cannot handle this.", - ctx->attr->data.non_resident. + a->data.non_resident. compression_unit); err = -EOPNOTSUPP; goto unm_err_out; } + ni->itype.compressed.block_clusters = 1U << + a->data.non_resident.compression_unit; ni->itype.compressed.block_size = 1U << ( - ctx->attr->data.non_resident. - compression_unit + + a->data.non_resident.compression_unit + vol->cluster_size_bits); ni->itype.compressed.block_size_bits = ffs( - ni->itype.compressed.block_size) - 1; + ni->itype.compressed.block_size) - 1; + ni->itype.compressed.size = sle64_to_cpu( + a->data.non_resident.compressed_size); } - if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { - ntfs_error(vi->i_sb, "Found encrypted " - "and compressed data."); + if (a->flags & ATTR_IS_ENCRYPTED) { + if (a->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "Found encrypted and " + "compressed data."); goto unm_err_out; } if (NInoMstProtected(ni)) { @@ -1317,37 +1331,17 @@ } NInoSetEncrypted(ni); } - if (ctx->attr->flags & ATTR_IS_SPARSE) { - if (NInoMstProtected(ni)) { - ntfs_error(vi->i_sb, "Found mst protected " - "attribute but the attribute " - "is sparse. Please report " - "you saw this message to " - "linux-ntfs-dev@lists." - "sourceforge.net"); - goto unm_err_out; - } - NInoSetSparse(ni); - } - if (ctx->attr->data.non_resident.lowest_vcn) { + if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of attribute has " "non-zero lowest_vcn."); goto unm_err_out; } - /* Setup all the sizes. */ - vi->i_size = sle64_to_cpu( - ctx->attr->data.non_resident.data_size); + vi->i_size = sle64_to_cpu(a->data.non_resident.data_size); ni->initialized_size = sle64_to_cpu( - ctx->attr->data.non_resident.initialized_size); + a->data.non_resident.initialized_size); ni->allocated_size = sle64_to_cpu( - ctx->attr->data.non_resident.allocated_size); - if (NInoCompressed(ni)) { - ni->itype.compressed.size = sle64_to_cpu( - ctx->attr->data.non_resident. - compressed_size); - } + a->data.non_resident.allocated_size); } - /* Setup the operations for this attribute inode. */ vi->i_op = NULL; vi->i_fop = NULL; @@ -1355,12 +1349,10 @@ vi->i_mapping->a_ops = &ntfs_mst_aops; else vi->i_mapping->a_ops = &ntfs_aops; - - if (!NInoCompressed(ni)) - vi->i_blocks = ni->allocated_size >> 9; - else + if (NInoCompressed(ni) || NInoSparse(ni)) vi->i_blocks = ni->itype.compressed.size >> 9; - + else + vi->i_blocks = ni->allocated_size >> 9; /* * Make sure the base inode doesn't go away and attach it to the * attribute inode. @@ -1429,10 +1421,12 @@ */ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) { + loff_t bvi_size; ntfs_volume *vol = NTFS_SB(vi->i_sb); ntfs_inode *ni, *base_ni, *bni; struct inode *bvi; MFT_RECORD *m; + ATTR_RECORD *a; ntfs_attr_search_ctx *ctx; INDEX_ROOT *ir; u8 *ir_end, *index_end; @@ -1474,30 +1468,28 @@ "missing."); goto unm_err_out; } + a = ctx->attr; /* Set up the state. */ - if (unlikely(ctx->attr->non_resident)) { + if (unlikely(a->non_resident)) { ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident."); goto unm_err_out; } /* Ensure the attribute name is placed before the value. */ - if (unlikely(ctx->attr->name_length && - (le16_to_cpu(ctx->attr->name_offset) >= - le16_to_cpu(ctx->attr->data.resident. - value_offset)))) { + if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= + le16_to_cpu(a->data.resident.value_offset)))) { ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed " "after the attribute value."); goto unm_err_out; } /* Compressed/encrypted/sparse index root is not allowed. */ - if (ctx->attr->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | + if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED | ATTR_IS_SPARSE)) { ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index " "root attribute."); goto unm_err_out; } - ir = (INDEX_ROOT*)((u8*)ctx->attr + - le16_to_cpu(ctx->attr->data.resident.value_offset)); - ir_end = (u8*)ir + le32_to_cpu(ctx->attr->data.resident.value_length); + ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->data.resident.value_offset)); + ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length); if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) { ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt."); goto unm_err_out; @@ -1570,7 +1562,7 @@ "$INDEX_ALLOCATION attribute."); goto unm_err_out; } - if (!ctx->attr->non_resident) { + if (!a->non_resident) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " "resident."); goto unm_err_out; @@ -1578,37 +1570,36 @@ /* * Ensure the attribute name is placed before the mapping pairs array. */ - if (unlikely(ctx->attr->name_length && (le16_to_cpu( - ctx->attr->name_offset) >= le16_to_cpu( - ctx->attr->data.non_resident.mapping_pairs_offset)))) { + if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >= + le16_to_cpu( + a->data.non_resident.mapping_pairs_offset)))) { ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is " "placed after the mapping pairs array."); goto unm_err_out; } - if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (a->flags & ATTR_IS_ENCRYPTED) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " "encrypted."); goto unm_err_out; } - if (ctx->attr->flags & ATTR_IS_SPARSE) { + if (a->flags & ATTR_IS_SPARSE) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse."); goto unm_err_out; } - if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + if (a->flags & ATTR_COMPRESSION_MASK) { ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is " "compressed."); goto unm_err_out; } - if (ctx->attr->data.non_resident.lowest_vcn) { + if (a->data.non_resident.lowest_vcn) { ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION " "attribute has non zero lowest_vcn."); goto unm_err_out; } - vi->i_size = sle64_to_cpu(ctx->attr->data.non_resident.data_size); + vi->i_size = sle64_to_cpu(a->data.non_resident.data_size); ni->initialized_size = sle64_to_cpu( - ctx->attr->data.non_resident.initialized_size); - ni->allocated_size = sle64_to_cpu( - ctx->attr->data.non_resident.allocated_size); + a->data.non_resident.initialized_size); + ni->allocated_size = sle64_to_cpu(a->data.non_resident.allocated_size); /* * We are done with the mft record, so we release it. Otherwise * we would deadlock in ntfs_attr_iget(). @@ -1632,10 +1623,10 @@ goto iput_unm_err_out; } /* Consistency check bitmap size vs. index allocation size. */ - if ((bvi->i_size << 3) < (vi->i_size >> - ni->itype.index.block_size_bits)) { + bvi_size = i_size_read(bvi); + if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) { ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for " - "index allocation (0x%llx).", bvi->i_size << 3, + "index allocation (0x%llx).", bvi_size << 3, vi->i_size); goto iput_unm_err_out; } @@ -1646,7 +1637,6 @@ vi->i_fop = NULL; vi->i_mapping->a_ops = &ntfs_mst_aops; vi->i_blocks = ni->allocated_size >> 9; - /* * Make sure the base inode doesn't go away and attach it to the * index inode. @@ -1712,7 +1702,7 @@ struct buffer_head *bh; ntfs_inode *ni; MFT_RECORD *m = NULL; - ATTR_RECORD *attr; + ATTR_RECORD *a; ntfs_attr_search_ctx *ctx; unsigned int i, nr_blocks; int err; @@ -1727,10 +1717,10 @@ /* Setup the data attribute. It is special as it is mst protected. */ NInoSetNonResident(ni); NInoSetMstProtected(ni); + NInoSetSparseDisabled(ni); ni->type = AT_DATA; ni->name = NULL; ni->name_len = 0; - /* * This sets up our little cheat allowing us to reuse the async read io * completion handler for directories. @@ -1808,9 +1798,10 @@ ntfs_debug("Attribute list attribute found in $MFT."); NInoSetAttrList(ni); - if (ctx->attr->flags & ATTR_IS_ENCRYPTED || - ctx->attr->flags & ATTR_COMPRESSION_MASK || - ctx->attr->flags & ATTR_IS_SPARSE) { + a = ctx->attr; + if (a->flags & ATTR_IS_ENCRYPTED || + a->flags & ATTR_COMPRESSION_MASK || + a->flags & ATTR_IS_SPARSE) { ntfs_error(sb, "Attribute list attribute is " "compressed/encrypted/sparse. Not " "allowed. $MFT is corrupt. You should " @@ -1818,16 +1809,16 @@ goto put_err_out; } /* Now allocate memory for the attribute list. */ - ni->attr_list_size = (u32)ntfs_attr_size(ctx->attr); + ni->attr_list_size = (u32)ntfs_attr_size(a); ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); if (!ni->attr_list) { ntfs_error(sb, "Not enough memory to allocate buffer " "for attribute list."); goto put_err_out; } - if (ctx->attr->non_resident) { + if (a->non_resident) { NInoSetAttrListNonResident(ni); - if (ctx->attr->data.non_resident.lowest_vcn) { + if (a->data.non_resident.lowest_vcn) { ntfs_error(sb, "Attribute list has non zero " "lowest_vcn. $MFT is corrupt. " "You should run chkdsk."); @@ -1835,7 +1826,7 @@ } /* Setup the runlist. */ ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol, - ctx->attr, NULL); + a, NULL); if (IS_ERR(ni->attr_list_rl.rl)) { err = PTR_ERR(ni->attr_list_rl.rl); ni->attr_list_rl.rl = NULL; @@ -1847,7 +1838,7 @@ /* Now load the attribute list. */ if ((err = load_attribute_list(vol, &ni->attr_list_rl, ni->attr_list, ni->attr_list_size, - sle64_to_cpu(ctx->attr->data. + sle64_to_cpu(a->data. non_resident.initialized_size)))) { ntfs_error(sb, "Failed to load attribute list " "attribute with error code %i.", @@ -1855,20 +1846,20 @@ goto put_err_out; } } else /* if (!ctx.attr->non_resident) */ { - if ((u8*)ctx->attr + le16_to_cpu( - ctx->attr->data.resident.value_offset) + + if ((u8*)a + le16_to_cpu( + a->data.resident.value_offset) + le32_to_cpu( - ctx->attr->data.resident.value_length) > + a->data.resident.value_length) > (u8*)ctx->mrec + vol->mft_record_size) { ntfs_error(sb, "Corrupt attribute list " "attribute."); goto put_err_out; } /* Now copy the attribute list. */ - memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( - ctx->attr->data.resident.value_offset), + memcpy(ni->attr_list, (u8*)a + le16_to_cpu( + a->data.resident.value_offset), le32_to_cpu( - ctx->attr->data.resident.value_length)); + a->data.resident.value_length)); } /* The attribute list is now setup in memory. */ /* @@ -1934,25 +1925,25 @@ ntfs_attr_reinit_search_ctx(ctx); /* Now load all attribute extents. */ - attr = NULL; + a = NULL; next_vcn = last_vcn = highest_vcn = 0; while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0, ctx))) { runlist_element *nrl; /* Cache the current attribute. */ - attr = ctx->attr; + a = ctx->attr; /* $MFT must be non-resident. */ - if (!attr->non_resident) { + if (!a->non_resident) { ntfs_error(sb, "$MFT must be non-resident but a " "resident extent was found. $MFT is " "corrupt. Run chkdsk."); goto put_err_out; } /* $MFT must be uncompressed and unencrypted. */ - if (attr->flags & ATTR_COMPRESSION_MASK || - attr->flags & ATTR_IS_ENCRYPTED || - attr->flags & ATTR_IS_SPARSE) { + if (a->flags & ATTR_COMPRESSION_MASK || + a->flags & ATTR_IS_ENCRYPTED || + a->flags & ATTR_IS_SPARSE) { ntfs_error(sb, "$MFT must be uncompressed, " "non-sparse, and unencrypted but a " "compressed/sparse/encrypted extent " @@ -1966,7 +1957,7 @@ * as we have exclusive access to the inode at this time and we * are a mount in progress task, too. */ - nrl = ntfs_mapping_pairs_decompress(vol, attr, ni->runlist.rl); + nrl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl); if (IS_ERR(nrl)) { ntfs_error(sb, "ntfs_mapping_pairs_decompress() " "failed with error code %ld. $MFT is " @@ -1977,7 +1968,7 @@ /* Are we in the first extent? */ if (!next_vcn) { - if (attr->data.non_resident.lowest_vcn) { + if (a->data.non_resident.lowest_vcn) { ntfs_error(sb, "First extent of $DATA " "attribute has non zero " "lowest_vcn. $MFT is corrupt. " @@ -1986,15 +1977,15 @@ } /* Get the last vcn in the $DATA attribute. */ last_vcn = sle64_to_cpu( - attr->data.non_resident.allocated_size) + a->data.non_resident.allocated_size) >> vol->cluster_size_bits; /* Fill in the inode size. */ vi->i_size = sle64_to_cpu( - attr->data.non_resident.data_size); - ni->initialized_size = sle64_to_cpu(attr->data. - non_resident.initialized_size); + a->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu( + a->data.non_resident.initialized_size); ni->allocated_size = sle64_to_cpu( - attr->data.non_resident.allocated_size); + a->data.non_resident.allocated_size); /* * Verify the number of mft records does not exceed * 2^32 - 1. @@ -2051,7 +2042,7 @@ } /* Get the lowest vcn for the next extent. */ - highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); + highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn); next_vcn = highest_vcn + 1; /* Only one extent or error, which we catch below. */ @@ -2060,7 +2051,7 @@ /* Avoid endless loops due to corruption. */ if (next_vcn < sle64_to_cpu( - attr->data.non_resident.lowest_vcn)) { + a->data.non_resident.lowest_vcn)) { ntfs_error(sb, "$MFT has corrupt attribute list " "attribute. Run chkdsk."); goto put_err_out; @@ -2071,7 +2062,7 @@ "$MFT is corrupt. Run chkdsk."); goto put_err_out; } - if (!attr) { + if (!a) { ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is " "corrupt. Run chkdsk."); goto put_err_out; @@ -2275,6 +2266,8 @@ seq_printf(sf, ",case_sensitive"); if (NVolShowSystemFiles(vol)) seq_printf(sf, ",show_sys_files"); + if (!NVolSparseEnabled(vol)) + seq_printf(sf, ",disable_sparse"); for (i = 0; on_errors_arr[i].val; i++) { if (on_errors_arr[i].val & vol->on_errors) seq_printf(sf, ",errors=%s", on_errors_arr[i].str); @@ -2311,6 +2304,7 @@ ntfs_volume *vol = ni->vol; ntfs_attr_search_ctx *ctx; MFT_RECORD *m; + ATTR_RECORD *a; const char *te = " Leaving file length out of sync with i_size."; int err; @@ -2347,14 +2341,15 @@ vi->i_ino, err); goto err_out; } + a = ctx->attr; /* If the size has not changed there is nothing to do. */ - if (ntfs_attr_size(ctx->attr) == i_size_read(vi)) + if (ntfs_attr_size(a) == i_size_read(vi)) goto done; // TODO: Implement the truncate... ntfs_error(vi->i_sb, "Inode size has changed but this is not " "implemented yet. Resetting inode size to old value. " " This is most likely a bug in the ntfs driver!"); - i_size_write(vi, ntfs_attr_size(ctx->attr)); + i_size_write(vi, ntfs_attr_size(a)); done: ntfs_attr_put_search_ctx(ctx); unmap_mft_record(ni); @@ -2515,18 +2510,18 @@ nt = utc2ntfs(vi->i_mtime); if (si->last_data_change_time != nt) { ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, " - "new = 0x%llx", vi->i_ino, + "new = 0x%llx", vi->i_ino, (long long) sle64_to_cpu(si->last_data_change_time), - sle64_to_cpu(nt)); + (long long)sle64_to_cpu(nt)); si->last_data_change_time = nt; modified = TRUE; } nt = utc2ntfs(vi->i_ctime); if (si->last_mft_change_time != nt) { ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, " - "new = 0x%llx", vi->i_ino, + "new = 0x%llx", vi->i_ino, (long long) sle64_to_cpu(si->last_mft_change_time), - sle64_to_cpu(nt)); + (long long)sle64_to_cpu(nt)); si->last_mft_change_time = nt; modified = TRUE; } @@ -2534,8 +2529,8 @@ if (si->last_access_time != nt) { ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, " "new = 0x%llx", vi->i_ino, - sle64_to_cpu(si->last_access_time), - sle64_to_cpu(nt)); + (long long)sle64_to_cpu(si->last_access_time), + (long long)sle64_to_cpu(nt)); si->last_access_time = nt; modified = TRUE; } Index: fs/ntfs/inode.h =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/inode.h (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/inode.h (mode:100644) @@ -2,7 +2,7 @@ * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of * the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -44,6 +44,7 @@ * fields already provided in the VFS inode. */ struct _ntfs_inode { + rwlock_t size_lock; /* Lock serializing access to inode sizes. */ s64 initialized_size; /* Copy from the attribute record. */ s64 allocated_size; /* Copy from the attribute record. */ unsigned long state; /* NTFS specific flags describing this inode. @@ -165,6 +166,7 @@ NI_Sparse, /* 1: Unnamed data attr is sparse (f). 1: Create sparse files by default (d). 1: Attribute is sparse (a). */ + NI_SparseDisabled, /* 1: May not create sparse regions. */ NI_TruncateFailed, /* 1: Last ntfs_truncate() call failed. */ } ntfs_inode_state_bits; @@ -217,6 +219,7 @@ NINO_FNS(Compressed) NINO_FNS(Encrypted) NINO_FNS(Sparse) +NINO_FNS(SparseDisabled) NINO_FNS(TruncateFailed) /* Index: fs/ntfs/layout.h =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/layout.h (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/layout.h (mode:100644) @@ -2,7 +2,7 @@ * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS * project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -547,26 +547,44 @@ COLLATION_NTOFS_ULONG = const_cpu_to_le32(0x10), COLLATION_NTOFS_SID = const_cpu_to_le32(0x11), COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(0x12), - COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13) + COLLATION_NTOFS_ULONGS = const_cpu_to_le32(0x13), }; typedef le32 COLLATION_RULE; /* * The flags (32-bit) describing attribute properties in the attribute - * definition structure. FIXME: This information is from Regis's information - * and, according to him, it is not certain and probably incomplete. - * The INDEXABLE flag is fairly certainly correct as only the file name - * attribute has this flag set and this is the only attribute indexed in NT4. - */ -enum { - INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be - indexed. */ - NEED_TO_REGENERATE = const_cpu_to_le32(0x40), /* Need to regenerate - during regeneration - phase. */ - CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80), /* Attribute can be - non-resident. */ + * definition structure. FIXME: This information is based on Regis's + * information and, according to him, it is not certain and probably + * incomplete. The INDEXABLE flag is fairly certainly correct as only the file + * name attribute has this flag set and this is the only attribute indexed in + * NT4. + */ +enum { + ATTR_DEF_INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be + indexed. */ + ATTR_DEF_MULTIPLE = const_cpu_to_le32(0x04), /* Attribute type + can be present multiple times in the + mft records of an inode. */ + ATTR_DEF_NOT_ZERO = const_cpu_to_le32(0x08), /* Attribute value + must contain at least one non-zero + byte. */ + ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be + indexed and the attribute value must be + unique for the attribute type in all of + the mft records of an inode. */ + ATTR_DEF_NAMED_UNIQUE = const_cpu_to_le32(0x20), /* Attribute must be + named and the name must be unique for + the attribute type in all of the mft + records of an inode. */ + ATTR_DEF_RESIDENT = const_cpu_to_le32(0x40), /* Attribute must be + resident. */ + ATTR_DEF_ALWAYS_LOG = const_cpu_to_le32(0x80), /* Always log + modifications to this attribute, + regardless of whether it is resident or + non-resident. Without this, only log + modifications if the attribute is + resident. */ }; typedef le32 ATTR_DEF_FLAGS; @@ -749,10 +767,11 @@ record header aligned to 8-byte boundary. */ /* 34*/ u8 compression_unit; /* The compression unit expressed as the log to the base 2 of the number of - clusters in a compression unit. 0 means not - compressed. (This effectively limits the + clusters in a compression unit. 0 means not + compressed. (This effectively limits the compression unit size to be a power of two - clusters.) WinNT4 only uses a value of 4. */ + clusters.) WinNT4 only uses a value of 4. + Sparse files also have this set to 4. */ /* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */ /* The sizes below are only used when lowest_vcn is zero, as otherwise it would be difficult to keep them up-to-date.*/ @@ -772,10 +791,10 @@ data_size. */ /* sizeof(uncompressed attr) = 64*/ /* 64*/ sle64 compressed_size; /* Byte size of the attribute - value after compression. Only present when - compressed. Always is a multiple of the - cluster size. Represents the actual amount of - disk space being used on the disk. */ + value after compression. Only present when + compressed or sparse. Always is a multiple of + the cluster size. Represents the actual amount + of disk space being used on the disk. */ /* sizeof(compressed attr) = 72*/ } __attribute__ ((__packed__)) non_resident; } __attribute__ ((__packed__)) data; @@ -834,7 +853,7 @@ /* Note, this is a copy of the corresponding bit from the mft record, telling us whether this file has a view index present (eg. object id index, quota index, one of the security indexes or the encrypting - file system related indexes). */ + filesystem related indexes). */ }; typedef le32 FILE_ATTR_FLAGS; Index: fs/ntfs/lcnalloc.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/lcnalloc.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/lcnalloc.c (mode:100644) @@ -1,7 +1,7 @@ /* * lcnalloc.c - Cluster (de)allocation code. Part of the Linux-NTFS project. * - * Copyright (c) 2004 Anton Altaparmakov + * Copyright (c) 2004-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -60,7 +60,7 @@ if (rl->lcn < 0) continue; err = ntfs_bitmap_clear_run(lcnbmp_vi, rl->lcn, rl->length); - if (unlikely(err && (!ret || ret == ENOMEM) && ret != err)) + if (unlikely(err && (!ret || ret == -ENOMEM) && ret != err)) ret = err; } ntfs_debug("Done."); @@ -140,6 +140,7 @@ LCN zone_start, zone_end, bmp_pos, bmp_initial_pos, last_read_pos, lcn; LCN prev_lcn = 0, prev_run_len = 0, mft_zone_size; s64 clusters; + loff_t i_size; struct inode *lcnbmp_vi; runlist_element *rl = NULL; struct address_space *mapping; @@ -249,6 +250,7 @@ clusters = count; rlpos = rlsize = 0; mapping = lcnbmp_vi->i_mapping; + i_size = i_size_read(lcnbmp_vi); while (1) { ntfs_debug("Start of outer while loop: done_zones 0x%x, " "search_zone %i, pass %i, zone_start 0x%llx, " @@ -263,7 +265,7 @@ last_read_pos = bmp_pos >> 3; ntfs_debug("last_read_pos 0x%llx.", (unsigned long long)last_read_pos); - if (last_read_pos > lcnbmp_vi->i_size) { + if (last_read_pos > i_size) { ntfs_debug("End of attribute reached. " "Skipping to zone_pass_done."); goto zone_pass_done; @@ -287,8 +289,8 @@ buf_size = last_read_pos & ~PAGE_CACHE_MASK; buf = page_address(page) + buf_size; buf_size = PAGE_CACHE_SIZE - buf_size; - if (unlikely(last_read_pos + buf_size > lcnbmp_vi->i_size)) - buf_size = lcnbmp_vi->i_size - last_read_pos; + if (unlikely(last_read_pos + buf_size > i_size)) + buf_size = i_size - last_read_pos; buf_size <<= 3; lcn = bmp_pos & 7; bmp_pos &= ~7; @@ -691,7 +693,7 @@ if (zone == MFT_ZONE || mft_zone_size <= 0) { ntfs_debug("No free clusters left, going to out."); /* Really no more space left on device. */ - err = ENOSPC; + err = -ENOSPC; goto out; } /* zone == DATA_ZONE && mft_zone_size > 0 */ ntfs_debug("Shrinking mft zone."); @@ -755,13 +757,13 @@ if (rl) { int err2; - if (err == ENOSPC) + if (err == -ENOSPC) ntfs_debug("Not enough space to complete allocation, " - "err ENOSPC, first free lcn 0x%llx, " + "err -ENOSPC, first free lcn 0x%llx, " "could allocate up to 0x%llx " "clusters.", (unsigned long long)rl[0].lcn, - (unsigned long long)count - clusters); + (unsigned long long)(count - clusters)); /* Deallocate all allocated clusters. */ ntfs_debug("Attempting rollback..."); err2 = ntfs_cluster_free_from_rl_nolock(vol, rl); @@ -773,10 +775,10 @@ } /* Free the runlist. */ ntfs_free(rl); - } else if (err == ENOSPC) - ntfs_debug("No space left at all, err = ENOSPC, " - "first free lcn = 0x%llx.", - (unsigned long long)vol->data1_zone_pos); + } else if (err == -ENOSPC) + ntfs_debug("No space left at all, err = -ENOSPC, first free " + "lcn = 0x%llx.", + (long long)vol->data1_zone_pos); up_write(&vol->lcnbmp_lock); return ERR_PTR(err); } @@ -846,8 +848,8 @@ total_freed = real_freed = 0; - /* This returns with ni->runlist locked for reading on success. */ - rl = ntfs_find_vcn(ni, start_vcn, FALSE); + down_read(&ni->runlist.lock); + rl = ntfs_attr_find_vcn_nolock(ni, start_vcn, FALSE); if (IS_ERR(rl)) { if (!is_rollback) ntfs_error(vol->sb, "Failed to find first runlist " @@ -861,7 +863,7 @@ ntfs_error(vol->sb, "First runlist element has " "invalid lcn, aborting."); err = -EIO; - goto unl_err_out; + goto err_out; } /* Find the starting cluster inside the run that needs freeing. */ delta = start_vcn - rl->vcn; @@ -879,7 +881,7 @@ if (!is_rollback) ntfs_error(vol->sb, "Failed to clear first run " "(error %i), aborting.", err); - goto unl_err_out; + goto err_out; } /* We have freed @to_free real clusters. */ real_freed = to_free; @@ -899,30 +901,15 @@ if (unlikely(rl->lcn < LCN_HOLE)) { VCN vcn; - /* - * Attempt to map runlist, dropping runlist lock for - * the duration. - */ + /* Attempt to map runlist. */ vcn = rl->vcn; - up_read(&ni->runlist.lock); - err = ntfs_map_runlist(ni, vcn); - if (err) { - if (!is_rollback) - ntfs_error(vol->sb, "Failed to map " - "runlist fragment."); - if (err == -EINVAL || err == -ENOENT) - err = -EIO; - goto err_out; - } - /* - * This returns with ni->runlist locked for reading on - * success. - */ - rl = ntfs_find_vcn(ni, vcn, FALSE); + rl = ntfs_attr_find_vcn_nolock(ni, vcn, FALSE); if (IS_ERR(rl)) { err = PTR_ERR(rl); if (!is_rollback) - ntfs_error(vol->sb, "Failed to find " + ntfs_error(vol->sb, "Failed to map " + "runlist fragment or " + "failed to find " "subsequent runlist " "element."); goto err_out; @@ -935,7 +922,7 @@ (unsigned long long) rl->lcn); err = -EIO; - goto unl_err_out; + goto err_out; } } /* The number of clusters in this run that need freeing. */ @@ -951,7 +938,7 @@ if (!is_rollback) ntfs_error(vol->sb, "Failed to clear " "subsequent run."); - goto unl_err_out; + goto err_out; } /* We have freed @to_free real clusters. */ real_freed += to_free; @@ -972,9 +959,8 @@ /* We are done. Return the number of actually freed clusters. */ ntfs_debug("Done."); return real_freed; -unl_err_out: - up_read(&ni->runlist.lock); err_out: + up_read(&ni->runlist.lock); if (is_rollback) return err; /* If no real clusters were freed, no need to rollback. */ Index: fs/ntfs/logfile.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/logfile.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/logfile.c (mode:100644) @@ -443,7 +443,7 @@ /* An empty $LogFile must have been clean before it got emptied. */ if (NVolLogFileEmpty(vol)) goto is_empty; - size = log_vi->i_size; + size = i_size_read(log_vi); /* Make sure the file doesn't exceed the maximum allowed size. */ if (size > MaxLogFileSize) size = MaxLogFileSize; @@ -689,7 +689,8 @@ if (!NVolLogFileEmpty(vol)) { int err; - err = ntfs_attr_set(NTFS_I(log_vi), 0, log_vi->i_size, 0xff); + err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi), + 0xff); if (unlikely(err)) { ntfs_error(vol->sb, "Failed to fill $LogFile with " "0xff bytes (error code %i).", err); Index: fs/ntfs/mft.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/mft.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/mft.c (mode:100644) @@ -1,7 +1,7 @@ /** * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -45,6 +45,7 @@ */ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) { + loff_t i_size; ntfs_volume *vol = ni->vol; struct inode *mft_vi = vol->mft_ino; struct page *page; @@ -60,13 +61,14 @@ index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; + i_size = i_size_read(mft_vi); /* The maximum valid index into the page cache for $MFT's data. */ - end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; + end_index = i_size >> PAGE_CACHE_SHIFT; /* If the wanted index is out of bounds the mft record doesn't exist. */ if (unlikely(index >= end_index)) { - if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) < - ofs + vol->mft_record_size) { + if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs + + vol->mft_record_size) { page = ERR_PTR(-ENOENT); ntfs_error(vol->sb, "Attemt to read mft record 0x%lx, " "which is beyond the end of the mft. " @@ -285,7 +287,7 @@ } unmap_mft_record(ni); ntfs_error(base_ni->vol->sb, "Found stale extent mft " - "reference! Corrupt file system. " + "reference! Corrupt filesystem. " "Run chkdsk."); return ERR_PTR(-EIO); } @@ -316,7 +318,7 @@ /* Verify the sequence number if it is present. */ if (seq_no && (le16_to_cpu(m->sequence_number) != seq_no)) { ntfs_error(base_ni->vol->sb, "Found stale extent mft " - "reference! Corrupt file system. Run chkdsk."); + "reference! Corrupt filesystem. Run chkdsk."); destroy_ni = TRUE; m = ERR_PTR(-EIO); goto unm_err_out; @@ -1121,6 +1123,7 @@ ntfs_inode *base_ni) { s64 pass_end, ll, data_pos, pass_start, ofs, bit; + unsigned long flags; struct address_space *mftbmp_mapping; u8 *buf, *byte; struct page *page; @@ -1134,9 +1137,13 @@ * Set the end of the pass making sure we do not overflow the mft * bitmap. */ + read_lock_irqsave(&NTFS_I(vol->mft_ino)->size_lock, flags); pass_end = NTFS_I(vol->mft_ino)->allocated_size >> vol->mft_record_size_bits; + read_unlock_irqrestore(&NTFS_I(vol->mft_ino)->size_lock, flags); + read_lock_irqsave(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); ll = NTFS_I(vol->mftbmp_ino)->initialized_size << 3; + read_unlock_irqrestore(&NTFS_I(vol->mftbmp_ino)->size_lock, flags); if (pass_end > ll) pass_end = ll; pass = 1; @@ -1263,6 +1270,7 @@ { LCN lcn; s64 ll; + unsigned long flags; struct page *page; ntfs_inode *mft_ni, *mftbmp_ni; runlist_element *rl, *rl2 = NULL; @@ -1284,17 +1292,20 @@ /* * Determine the last lcn of the mft bitmap. The allocated size of the * mft bitmap cannot be zero so we are ok to do this. - * ntfs_find_vcn() returns the runlist locked on success. */ - rl = ntfs_find_vcn(mftbmp_ni, (mftbmp_ni->allocated_size - 1) >> - vol->cluster_size_bits, TRUE); + down_write(&mftbmp_ni->runlist.lock); + read_lock_irqsave(&mftbmp_ni->size_lock, flags); + ll = mftbmp_ni->allocated_size; + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); + rl = ntfs_attr_find_vcn_nolock(mftbmp_ni, + (ll - 1) >> vol->cluster_size_bits, TRUE); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { + up_write(&mftbmp_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " "cluster of mft bitmap attribute."); - if (!IS_ERR(rl)) { - up_write(&mftbmp_ni->runlist.lock); + if (!IS_ERR(rl)) ret = -EIO; - } else + else ret = PTR_ERR(rl); return ret; } @@ -1418,6 +1429,8 @@ // TODO: Deal with this by moving this extent to a new mft // record or by starting a new extent in a new mft record or by // moving other attributes out of this mft record. + // Note: It will need to be a special mft record and if none of + // those are available it gets rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " "accomodate extended mft bitmap attribute " "extent. Cannot handle this yet."); @@ -1458,9 +1471,11 @@ } a = ctx->attr; } + write_lock_irqsave(&mftbmp_ni->size_lock, flags); mftbmp_ni->allocated_size += vol->cluster_size; a->data.non_resident.allocated_size = cpu_to_sle64(mftbmp_ni->allocated_size); + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); @@ -1476,7 +1491,9 @@ 0, ctx)) { ntfs_error(vol->sb, "Failed to find last attribute extent of " "mft bitmap attribute.%s", es); + write_lock_irqsave(&mftbmp_ni->size_lock, flags); mftbmp_ni->allocated_size += vol->cluster_size; + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); up_write(&mftbmp_ni->runlist.lock); @@ -1550,6 +1567,7 @@ static int ntfs_mft_bitmap_extend_initialized_nolock(ntfs_volume *vol) { s64 old_data_size, old_initialized_size; + unsigned long flags; struct inode *mftbmp_vi; ntfs_inode *mft_ni, *mftbmp_ni; ntfs_attr_search_ctx *ctx; @@ -1583,7 +1601,8 @@ goto put_err_out; } a = ctx->attr; - old_data_size = mftbmp_vi->i_size; + write_lock_irqsave(&mftbmp_ni->size_lock, flags); + old_data_size = i_size_read(mftbmp_vi); old_initialized_size = mftbmp_ni->initialized_size; /* * We can simply update the initialized_size before filling the space @@ -1593,11 +1612,12 @@ mftbmp_ni->initialized_size += 8; a->data.non_resident.initialized_size = cpu_to_sle64(mftbmp_ni->initialized_size); - if (mftbmp_ni->initialized_size > mftbmp_vi->i_size) { - mftbmp_vi->i_size = mftbmp_ni->initialized_size; + if (mftbmp_ni->initialized_size > old_data_size) { + i_size_write(mftbmp_vi, mftbmp_ni->initialized_size); a->data.non_resident.data_size = - cpu_to_sle64(mftbmp_vi->i_size); + cpu_to_sle64(mftbmp_ni->initialized_size); } + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); @@ -1636,22 +1656,28 @@ goto err_out; } a = ctx->attr; + write_lock_irqsave(&mftbmp_ni->size_lock, flags); mftbmp_ni->initialized_size = old_initialized_size; a->data.non_resident.initialized_size = cpu_to_sle64(old_initialized_size); - if (mftbmp_vi->i_size != old_data_size) { - mftbmp_vi->i_size = old_data_size; + if (i_size_read(mftbmp_vi) != old_data_size) { + i_size_write(mftbmp_vi, old_data_size); a->data.non_resident.data_size = cpu_to_sle64(old_data_size); } + write_unlock_irqrestore(&mftbmp_ni->size_lock, flags); flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); +#ifdef DEBUG + read_lock_irqsave(&mftbmp_ni->size_lock, flags); ntfs_debug("Restored status of mftbmp: allocated_size 0x%llx, " "data_size 0x%llx, initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, - (long long)mftbmp_vi->i_size, + (long long)i_size_read(mftbmp_vi), (long long)mftbmp_ni->initialized_size); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); +#endif /* DEBUG */ err_out: return ret; } @@ -1679,7 +1705,8 @@ { LCN lcn; VCN old_last_vcn; - s64 min_nr, nr, ll = 0; + s64 min_nr, nr, ll; + unsigned long flags; ntfs_inode *mft_ni; runlist_element *rl, *rl2; ntfs_attr_search_ctx *ctx = NULL; @@ -1695,23 +1722,25 @@ * Determine the preferred allocation location, i.e. the last lcn of * the mft data attribute. The allocated size of the mft data * attribute cannot be zero so we are ok to do this. - * ntfs_find_vcn() returns the runlist locked on success. */ - rl = ntfs_find_vcn(mft_ni, (mft_ni->allocated_size - 1) >> - vol->cluster_size_bits, TRUE); + down_write(&mft_ni->runlist.lock); + read_lock_irqsave(&mft_ni->size_lock, flags); + ll = mft_ni->allocated_size; + read_unlock_irqrestore(&mft_ni->size_lock, flags); + rl = ntfs_attr_find_vcn_nolock(mft_ni, + (ll - 1) >> vol->cluster_size_bits, TRUE); if (unlikely(IS_ERR(rl) || !rl->length || rl->lcn < 0)) { + up_write(&mft_ni->runlist.lock); ntfs_error(vol->sb, "Failed to determine last allocated " "cluster of mft data attribute."); - if (!IS_ERR(rl)) { - up_write(&mft_ni->runlist.lock); + if (!IS_ERR(rl)) ret = -EIO; - } else + else ret = PTR_ERR(rl); return ret; } lcn = rl->lcn + rl->length; - ntfs_debug("Last lcn of mft data attribute is 0x%llx.", - (long long)lcn); + ntfs_debug("Last lcn of mft data attribute is 0x%llx.", (long long)lcn); /* Minimum allocation is one mft record worth of clusters. */ min_nr = vol->mft_record_size >> vol->cluster_size_bits; if (!min_nr) @@ -1721,12 +1750,13 @@ if (!nr) nr = min_nr; /* Ensure we do not go above 2^32-1 mft records. */ - if (unlikely((mft_ni->allocated_size + - (nr << vol->cluster_size_bits)) >> + read_lock_irqsave(&mft_ni->size_lock, flags); + ll = mft_ni->allocated_size; + read_unlock_irqrestore(&mft_ni->size_lock, flags); + if (unlikely((ll + (nr << vol->cluster_size_bits)) >> vol->mft_record_size_bits >= (1ll << 32))) { nr = min_nr; - if (unlikely((mft_ni->allocated_size + - (nr << vol->cluster_size_bits)) >> + if (unlikely((ll + (nr << vol->cluster_size_bits)) >> vol->mft_record_size_bits >= (1ll << 32))) { ntfs_warning(vol->sb, "Cannot allocate mft record " "because the maximum number of inodes " @@ -1772,7 +1802,7 @@ return PTR_ERR(rl); } mft_ni->runlist.rl = rl; - ntfs_debug("Allocated %lli clusters.", nr); + ntfs_debug("Allocated %lli clusters.", (long long)nr); /* Find the last run in the new runlist. */ for (; rl[1].length; rl++) ; @@ -1832,7 +1862,11 @@ // moving other attributes out of this mft record. // Note: Use the special reserved mft records and ensure that // this extent is not required to find the mft record in - // question. + // question. If no free special records left we would need to + // move an existing record away, insert ours in its place, and + // then place the moved record into the newly allocated space + // and we would then need to update all references to this mft + // record appropriately. This is rather complicated... ntfs_error(vol->sb, "Not enough space in this mft record to " "accomodate extended mft data attribute " "extent. Cannot handle this yet."); @@ -1875,9 +1909,11 @@ } a = ctx->attr; } + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->allocated_size += nr << vol->cluster_size_bits; a->data.non_resident.allocated_size = cpu_to_sle64(mft_ni->allocated_size); + write_unlock_irqrestore(&mft_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); @@ -1892,7 +1928,9 @@ CASE_SENSITIVE, rl[1].vcn, NULL, 0, ctx)) { ntfs_error(vol->sb, "Failed to find last attribute extent of " "mft data attribute.%s", es); + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->allocated_size += nr << vol->cluster_size_bits; + write_unlock_irqrestore(&mft_ni->size_lock, flags); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); up_write(&mft_ni->runlist.lock); @@ -1991,7 +2029,7 @@ "reports this as corruption, please email " "linux-ntfs-dev@lists.sourceforge.net stating " "that you saw this message and that the " - "modified file system created was corrupt. " + "modified filesystem created was corrupt. " "Thank you."); } /* Set the update sequence number to 1. */ @@ -2036,6 +2074,7 @@ */ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no) { + loff_t i_size; struct inode *mft_vi = vol->mft_ino; struct page *page; MFT_RECORD *m; @@ -2051,10 +2090,11 @@ index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; /* The maximum valid index into the page cache for $MFT's data. */ - end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; + i_size = i_size_read(mft_vi); + end_index = i_size >> PAGE_CACHE_SHIFT; if (unlikely(index >= end_index)) { if (unlikely(index > end_index || ofs + vol->mft_record_size >= - (mft_vi->i_size & ~PAGE_CACHE_MASK))) { + (i_size & ~PAGE_CACHE_MASK))) { ntfs_error(vol->sb, "Tried to format non-existing mft " "record 0x%llx.", (long long)mft_no); return -ENOENT; @@ -2188,6 +2228,7 @@ ntfs_inode *base_ni, MFT_RECORD **mrec) { s64 ll, bit, old_data_initialized, old_data_size; + unsigned long flags; struct inode *vi; struct page *page; ntfs_inode *mft_ni, *mftbmp_ni, *ni; @@ -2237,9 +2278,13 @@ * the first 24 mft records as they are special and whilst they may not * be in use, we do not allocate from them. */ + read_lock_irqsave(&mft_ni->size_lock, flags); ll = mft_ni->initialized_size >> vol->mft_record_size_bits; - if (mftbmp_ni->initialized_size << 3 > ll && - mftbmp_ni->initialized_size > 3) { + read_unlock_irqrestore(&mft_ni->size_lock, flags); + read_lock_irqsave(&mftbmp_ni->size_lock, flags); + old_data_initialized = mftbmp_ni->initialized_size; + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); + if (old_data_initialized << 3 > ll && old_data_initialized > 3) { bit = ll; if (bit < 24) bit = 24; @@ -2254,15 +2299,18 @@ * mft record that we can allocate. * Note: The smallest mft record we allocate is mft record 24. */ - bit = mftbmp_ni->initialized_size << 3; + bit = old_data_initialized << 3; if (unlikely(bit >= (1ll << 32))) goto max_err_out; + read_lock_irqsave(&mftbmp_ni->size_lock, flags); + old_data_size = mftbmp_ni->allocated_size; ntfs_debug("Status of mftbmp before extension: allocated_size 0x%llx, " "data_size 0x%llx, initialized_size 0x%llx.", - (long long)mftbmp_ni->allocated_size, - (long long)vol->mftbmp_ino->i_size, - (long long)mftbmp_ni->initialized_size); - if (mftbmp_ni->initialized_size + 8 > mftbmp_ni->allocated_size) { + (long long)old_data_size, + (long long)i_size_read(vol->mftbmp_ino), + (long long)old_data_initialized); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); + if (old_data_initialized + 8 > old_data_size) { /* Need to extend bitmap by one more cluster. */ ntfs_debug("mftbmp: initialized_size + 8 > allocated_size."); err = ntfs_mft_bitmap_extend_allocation_nolock(vol); @@ -2270,12 +2318,16 @@ up_write(&vol->mftbmp_lock); goto err_out; } +#ifdef DEBUG + read_lock_irqsave(&mftbmp_ni->size_lock, flags); ntfs_debug("Status of mftbmp after allocation extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, - (long long)vol->mftbmp_ino->i_size, + (long long)i_size_read(vol->mftbmp_ino), (long long)mftbmp_ni->initialized_size); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); +#endif /* DEBUG */ } /* * We now have sufficient allocated space, extend the initialized_size @@ -2287,12 +2339,16 @@ up_write(&vol->mftbmp_lock); goto err_out; } +#ifdef DEBUG + read_lock_irqsave(&mftbmp_ni->size_lock, flags); ntfs_debug("Status of mftbmp after initialized extention: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mftbmp_ni->allocated_size, - (long long)vol->mftbmp_ino->i_size, + (long long)i_size_read(vol->mftbmp_ino), (long long)mftbmp_ni->initialized_size); + read_unlock_irqrestore(&mftbmp_ni->size_lock, flags); +#endif /* DEBUG */ ntfs_debug("Found free record (#3), bit 0x%llx.", (long long)bit); found_free_rec: /* @bit is the found free mft record, allocate it in the mft bitmap. */ @@ -2314,7 +2370,10 @@ * parallel allocation could allocate the same mft record as this one. */ ll = (bit + 1) << vol->mft_record_size_bits; - if (ll <= mft_ni->initialized_size) { + read_lock_irqsave(&mft_ni->size_lock, flags); + old_data_initialized = mft_ni->initialized_size; + read_unlock_irqrestore(&mft_ni->size_lock, flags); + if (ll <= old_data_initialized) { ntfs_debug("Allocated mft record already initialized."); goto mft_rec_already_initialized; } @@ -2325,26 +2384,30 @@ * actually traversed more than once when a freshly formatted volume is * first written to so it optimizes away nicely in the common case. */ + read_lock_irqsave(&mft_ni->size_lock, flags); ntfs_debug("Status of mft data before extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mft_ni->allocated_size, - (long long)vol->mft_ino->i_size, + (long long)i_size_read(vol->mft_ino), (long long)mft_ni->initialized_size); while (ll > mft_ni->allocated_size) { + read_unlock_irqrestore(&mft_ni->size_lock, flags); err = ntfs_mft_data_extend_allocation_nolock(vol); if (unlikely(err)) { ntfs_error(vol->sb, "Failed to extend mft data " "allocation."); goto undo_mftbmp_alloc_nolock; } + read_lock_irqsave(&mft_ni->size_lock, flags); ntfs_debug("Status of mft data after allocation extension: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mft_ni->allocated_size, - (long long)vol->mft_ino->i_size, + (long long)i_size_read(vol->mft_ino), (long long)mft_ni->initialized_size); } + read_unlock_irqrestore(&mft_ni->size_lock, flags); /* * Extend mft data initialized size (and data size of course) to reach * the allocated mft record, formatting the mft records allong the way. @@ -2352,6 +2415,7 @@ * needed by ntfs_mft_record_format(). We will update the attribute * record itself in one fell swoop later on. */ + write_lock_irqsave(&mft_ni->size_lock, flags); old_data_initialized = mft_ni->initialized_size; old_data_size = vol->mft_ino->i_size; while (ll > mft_ni->initialized_size) { @@ -2360,8 +2424,9 @@ new_initialized_size = mft_ni->initialized_size + vol->mft_record_size; mft_no = mft_ni->initialized_size >> vol->mft_record_size_bits; - if (new_initialized_size > vol->mft_ino->i_size) - vol->mft_ino->i_size = new_initialized_size; + if (new_initialized_size > i_size_read(vol->mft_ino)) + i_size_write(vol->mft_ino, new_initialized_size); + write_unlock_irqrestore(&mft_ni->size_lock, flags); ntfs_debug("Initializing mft record 0x%llx.", (long long)mft_no); err = ntfs_mft_record_format(vol, mft_no); @@ -2369,8 +2434,10 @@ ntfs_error(vol->sb, "Failed to format mft record."); goto undo_data_init; } + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->initialized_size = new_initialized_size; } + write_unlock_irqrestore(&mft_ni->size_lock, flags); record_formatted = TRUE; /* Update the mft data attribute record to reflect the new sizes. */ m = map_mft_record(mft_ni); @@ -2396,22 +2463,27 @@ goto undo_data_init; } a = ctx->attr; + read_lock_irqsave(&mft_ni->size_lock, flags); a->data.non_resident.initialized_size = cpu_to_sle64(mft_ni->initialized_size); - a->data.non_resident.data_size = cpu_to_sle64(vol->mft_ino->i_size); + a->data.non_resident.data_size = + cpu_to_sle64(i_size_read(vol->mft_ino)); + read_unlock_irqrestore(&mft_ni->size_lock, flags); /* Ensure the changes make it to disk. */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(mft_ni); + read_lock_irqsave(&mft_ni->size_lock, flags); ntfs_debug("Status of mft data after mft record initialization: " "allocated_size 0x%llx, data_size 0x%llx, " "initialized_size 0x%llx.", (long long)mft_ni->allocated_size, - (long long)vol->mft_ino->i_size, + (long long)i_size_read(vol->mft_ino), (long long)mft_ni->initialized_size); - BUG_ON(vol->mft_ino->i_size > mft_ni->allocated_size); - BUG_ON(mft_ni->initialized_size > vol->mft_ino->i_size); + BUG_ON(i_size_read(vol->mft_ino) > mft_ni->allocated_size); + BUG_ON(mft_ni->initialized_size > i_size_read(vol->mft_ino)); + read_unlock_irqrestore(&mft_ni->size_lock, flags); mft_rec_already_initialized: /* * We can finally drop the mft bitmap lock as the mft data attribute @@ -2652,8 +2724,10 @@ *mrec = m; return ni; undo_data_init: + write_lock_irqsave(&mft_ni->size_lock, flags); mft_ni->initialized_size = old_data_initialized; - vol->mft_ino->i_size = old_data_size; + i_size_write(vol->mft_ino, old_data_size); + write_unlock_irqrestore(&mft_ni->size_lock, flags); goto undo_mftbmp_alloc_nolock; undo_mftbmp_alloc: down_write(&vol->mftbmp_lock); Index: fs/ntfs/namei.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/namei.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/namei.c (mode:100644) @@ -153,8 +153,7 @@ ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with " "error code %li.", dent_ino, PTR_ERR(dent_inode)); - if (name) - kfree(name); + kfree(name); /* Return the error code. */ return (struct dentry *)dent_inode; } @@ -380,7 +379,7 @@ * Return the dentry of the parent directory on success or the error code on * error (IS_ERR() is true). */ -struct dentry *ntfs_get_parent(struct dentry *child_dent) +static struct dentry *ntfs_get_parent(struct dentry *child_dent) { struct inode *vi = child_dent->d_inode; ntfs_inode *ni = NTFS_I(vi); @@ -465,7 +464,7 @@ * * Return the dentry on success or the error code on error (IS_ERR() is true). */ -struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) +static struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh) { struct inode *vi; struct dentry *dent; @@ -496,3 +495,30 @@ ntfs_debug("Done for inode 0x%lx, generation 0x%x.", ino, gen); return dent; } + +/** + * Export operations allowing NFS exporting of mounted NTFS partitions. + * + * We use the default ->decode_fh() and ->encode_fh() for now. Note that they + * use 32 bits to store the inode number which is an unsigned long so on 64-bit + * architectures is usually 64 bits so it would all fail horribly on huge + * volumes. I guess we need to define our own encode and decode fh functions + * that store 64-bit inode numbers at some point but for now we will ignore the + * problem... + * + * We also use the default ->get_name() helper (used by ->decode_fh() via + * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs + * independent. + * + * The default ->get_parent() just returns -EACCES so we have to provide our + * own and the default ->get_dentry() is incompatible with NTFS due to not + * allowing the inode number 0 which is used in NTFS for the system file $MFT + * and due to using iget() whereas NTFS needs ntfs_iget(). + */ +struct export_operations ntfs_export_ops = { + .get_parent = ntfs_get_parent, /* Find the parent of a given + directory. */ + .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode + given a file handle + sub-fragment. */ +}; Index: fs/ntfs/ntfs.h =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/ntfs.h (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/ntfs.h (mode:100644) @@ -2,7 +2,7 @@ * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS * project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (C) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -31,6 +31,7 @@ #include #include #include +#include #include "types.h" #include "volume.h" @@ -41,6 +42,9 @@ NTFS_BLOCK_SIZE_BITS = 9, NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */ NTFS_MAX_NAME_LEN = 255, + NTFS_MAX_ATTR_NAME_LEN = 255, + NTFS_MAX_CLUSTER_SIZE = 64 * 1024, /* 64kiB */ + NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_CACHE_SIZE, } NTFS_CONSTANTS; /* Global variables. */ @@ -65,6 +69,8 @@ extern struct file_operations ntfs_empty_file_ops; extern struct inode_operations ntfs_empty_inode_ops; +extern struct export_operations ntfs_export_ops; + /** * NTFS_SB - return the ntfs volume given a vfs super block * @sb: VFS super block Index: fs/ntfs/runlist.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/runlist.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/runlist.c (mode:100644) @@ -1,7 +1,7 @@ /** * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -59,7 +59,7 @@ * * As the runlists grow, more memory will be required. To prevent the * kernel having to allocate and reallocate large numbers of small bits of - * memory, this function returns and entire page of memory. + * memory, this function returns an entire page of memory. * * It is up to the caller to serialize access to the runlist @rl. * @@ -113,8 +113,11 @@ BUG_ON(!dst); BUG_ON(!src); - if ((dst->lcn < 0) || (src->lcn < 0)) /* Are we merging holes? */ + if ((dst->lcn < 0) || (src->lcn < 0)) { /* Are we merging holes? */ + if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE) + return TRUE; return FALSE; + } if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ return FALSE; if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ @@ -855,30 +858,42 @@ if (!attr->data.non_resident.lowest_vcn) { VCN max_cluster; - max_cluster = (sle64_to_cpu( + max_cluster = ((sle64_to_cpu( attr->data.non_resident.allocated_size) + vol->cluster_size - 1) >> - vol->cluster_size_bits; + vol->cluster_size_bits) - 1; /* - * If there is a difference between the highest_vcn and the - * highest cluster, the runlist is either corrupt or, more - * likely, there are more extents following this one. + * A highest_vcn of zero means this is a single extent + * attribute so simply terminate the runlist with LCN_ENOENT). */ - if (deltaxcn < --max_cluster) { - ntfs_debug("More extents to follow; deltaxcn = 0x%llx, " - "max_cluster = 0x%llx", - (unsigned long long)deltaxcn, - (unsigned long long)max_cluster); - rl[rlpos].vcn = vcn; - vcn += rl[rlpos].length = max_cluster - deltaxcn; - rl[rlpos].lcn = LCN_RL_NOT_MAPPED; - rlpos++; - } else if (unlikely(deltaxcn > max_cluster)) { - ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = " - "0x%llx, max_cluster = 0x%llx", - (unsigned long long)deltaxcn, - (unsigned long long)max_cluster); - goto mpa_err; + if (deltaxcn) { + /* + * If there is a difference between the highest_vcn and + * the highest cluster, the runlist is either corrupt + * or, more likely, there are more extents following + * this one. + */ + if (deltaxcn < max_cluster) { + ntfs_debug("More extents to follow; deltaxcn " + "= 0x%llx, max_cluster = " + "0x%llx", + (unsigned long long)deltaxcn, + (unsigned long long) + max_cluster); + rl[rlpos].vcn = vcn; + vcn += rl[rlpos].length = max_cluster - + deltaxcn; + rl[rlpos].lcn = LCN_RL_NOT_MAPPED; + rlpos++; + } else if (unlikely(deltaxcn > max_cluster)) { + ntfs_error(vol->sb, "Corrupt attribute. " + "deltaxcn = 0x%llx, " + "max_cluster = 0x%llx", + (unsigned long long)deltaxcn, + (unsigned long long) + max_cluster); + goto mpa_err; + } } rl[rlpos].lcn = LCN_ENOENT; } else /* Not the base extent. There may be more extents to follow. */ @@ -918,17 +933,18 @@ * * It is up to the caller to serialize access to the runlist @rl. * - * Since lcns must be >= 0, we use negative return values with special meaning: + * Since lcns must be >= 0, we use negative return codes with special meaning: * - * Return value Meaning / Description + * Return code Meaning / Description * ================================================== - * -1 = LCN_HOLE Hole / not allocated on disk. - * -2 = LCN_RL_NOT_MAPPED This is part of the runlist which has not been - * inserted into the runlist yet. - * -3 = LCN_ENOENT There is no such vcn in the attribute. + * LCN_HOLE Hole / not allocated on disk. + * LCN_RL_NOT_MAPPED This is part of the runlist which has not been + * inserted into the runlist yet. + * LCN_ENOENT There is no such vcn in the attribute. * * Locking: - The caller must have locked the runlist (for reading or writing). - * - This function does not touch the lock. + * - This function does not touch the lock, nor does it modify the + * runlist. */ LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn) { @@ -964,6 +980,39 @@ return LCN_ENOENT; } +#ifdef NTFS_RW + +/** + * ntfs_rl_find_vcn_nolock - find a vcn in a runlist + * @rl: runlist to search + * @vcn: vcn to find + * + * Find the virtual cluster number @vcn in the runlist @rl and return the + * address of the runlist element containing the @vcn on success. + * + * Return NULL if @rl is NULL or @vcn is in an unmapped part/out of bounds of + * the runlist. + * + * Locking: The runlist must be locked on entry. + */ +runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl, const VCN vcn) +{ + BUG_ON(vcn < 0); + if (unlikely(!rl || vcn < rl[0].vcn)) + return NULL; + while (likely(rl->length)) { + if (unlikely(vcn < rl[1].vcn)) { + if (likely(rl->lcn >= LCN_HOLE)) + return rl; + return NULL; + } + rl++; + } + if (likely(rl->lcn == LCN_ENOENT)) + return rl; + return NULL; +} + /** * ntfs_get_nr_significant_bytes - get number of bytes needed to store a number * @n: number for which to get the number of bytes for @@ -1436,3 +1485,5 @@ ntfs_debug("Done."); return 0; } + +#endif /* NTFS_RW */ Index: fs/ntfs/runlist.h =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/runlist.h (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/runlist.h (mode:100644) @@ -2,7 +2,7 @@ * runlist.h - Defines for runlist handling in NTFS Linux kernel driver. * Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -66,6 +66,8 @@ LCN_HOLE = -1, /* Keep this as highest value or die! */ LCN_RL_NOT_MAPPED = -2, LCN_ENOENT = -3, + LCN_ENOMEM = -4, + LCN_EIO = -5, } LCN_SPECIAL_VALUES; extern runlist_element *ntfs_runlists_merge(runlist_element *drl, @@ -76,6 +78,11 @@ extern LCN ntfs_rl_vcn_to_lcn(const runlist_element *rl, const VCN vcn); +#ifdef NTFS_RW + +extern runlist_element *ntfs_rl_find_vcn_nolock(runlist_element *rl, + const VCN vcn); + extern int ntfs_get_size_for_mapping_pairs(const ntfs_volume *vol, const runlist_element *rl, const VCN start_vcn); @@ -86,4 +93,6 @@ extern int ntfs_rl_truncate_nolock(const ntfs_volume *vol, runlist *const runlist, const s64 new_length); +#endif /* NTFS_RW */ + #endif /* _LINUX_NTFS_RUNLIST_H */ Index: fs/ntfs/super.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/super.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/super.c (mode:100644) @@ -1,7 +1,7 @@ /* * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2001,2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -38,10 +38,11 @@ #include "debug.h" #include "index.h" #include "aops.h" +#include "layout.h" #include "malloc.h" #include "ntfs.h" -/* Number of mounted file systems which have compression enabled. */ +/* Number of mounted filesystems which have compression enabled. */ static unsigned long ntfs_nr_compression_users; /* A global default upcase table and a corresponding reference count. */ @@ -102,7 +103,7 @@ gid_t gid = (gid_t)-1; mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; int mft_zone_multiplier = -1, on_errors = -1; - int show_sys_files = -1, case_sensitive = -1; + int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; struct nls_table *nls_map = NULL, *old_nls; /* I am lazy... (-8 */ @@ -162,6 +163,7 @@ else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) + else NTFS_GETOPT_BOOL("disable_sparse", disable_sparse) else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors, on_errors_arr) else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes")) @@ -291,6 +293,21 @@ else NVolClearCaseSensitive(vol); } + if (disable_sparse != -1) { + if (disable_sparse) + NVolClearSparseEnabled(vol); + else { + if (!NVolSparseEnabled(vol) && + vol->major_ver && vol->major_ver < 3) + ntfs_warning(vol->sb, "Not enabling sparse " + "support due to NTFS volume " + "version %i.%i (need at least " + "version 3.0).", vol->major_ver, + vol->minor_ver); + else + NVolSetSparseEnabled(vol); + } + } return TRUE; needs_arg: ntfs_error(vol->sb, "The %s option requires an argument.", p); @@ -516,16 +533,19 @@ { /* * Check that checksum == sum of u32 values from b to the checksum - * field. If checksum is zero, no checking is done. + * field. If checksum is zero, no checking is done. We will work when + * the checksum test fails, since some utilities update the boot sector + * ignoring the checksum which leaves the checksum out-of-date. We + * report a warning if this is the case. */ - if ((void*)b < (void*)&b->checksum && b->checksum) { + if ((void*)b < (void*)&b->checksum && b->checksum && !silent) { le32 *u; u32 i; for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u) i += le32_to_cpup(u); if (le32_to_cpu(b->checksum) != i) - goto not_ntfs; + ntfs_warning(sb, "Invalid boot sector checksum."); } /* Check OEMidentifier is "NTFS " */ if (b->oem_id != magicNTFS) @@ -541,9 +561,9 @@ default: goto not_ntfs; } - /* Check the cluster size is not above 65536 bytes. */ + /* Check the cluster size is not above the maximum (64kiB). */ if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) * - b->bpb.sectors_per_cluster > 0x10000) + b->bpb.sectors_per_cluster > NTFS_MAX_CLUSTER_SIZE) goto not_ntfs; /* Check reserved/unused fields are really zero. */ if (le16_to_cpu(b->bpb.reserved_sectors) || @@ -575,7 +595,7 @@ * many BIOSes will refuse to boot from a bootsector if the magic is * incorrect, so we emit a warning. */ - if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) + if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55)) ntfs_warning(sb, "Invalid end of sector marker."); return TRUE; not_ntfs: @@ -967,6 +987,7 @@ tmp_ni = NTFS_I(tmp_ino); /* The $MFTMirr, like the $MFT is multi sector transfer protected. */ NInoSetMstProtected(tmp_ni); + NInoSetSparseDisabled(tmp_ni); /* * Set up our little cheat allowing us to reuse the async read io * completion handler for directories. @@ -990,12 +1011,12 @@ */ static BOOL check_mft_mirror(ntfs_volume *vol) { - unsigned long index; struct super_block *sb = vol->sb; ntfs_inode *mirr_ni; struct page *mft_page, *mirr_page; u8 *kmft, *kmirr; runlist_element *rl, rl2[2]; + pgoff_t index; int mrecs_per_page, i; ntfs_debug("Entering."); @@ -1122,6 +1143,7 @@ /* ntfs_check_logfile() will have displayed error output. */ return FALSE; } + NInoSetSparseDisabled(NTFS_I(tmp_ino)); vol->logfile_ino = tmp_ino; ntfs_debug("Done."); return TRUE; @@ -1175,8 +1197,7 @@ return FALSE; } /* We do not care for the type of match that was found. */ - if (name) - kfree(name); + kfree(name); /* Get the inode. */ tmp_ino = ntfs_iget(vol->sb, MREF(mref)); if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { @@ -1205,10 +1226,11 @@ */ static BOOL load_and_init_attrdef(ntfs_volume *vol) { + loff_t i_size; struct super_block *sb = vol->sb; struct inode *ino; struct page *page; - unsigned long index, max_index; + pgoff_t index, max_index; unsigned int size; ntfs_debug("Entering."); @@ -1219,14 +1241,16 @@ iput(ino); goto failed; } + NInoSetSparseDisabled(NTFS_I(ino)); /* The size of FILE_AttrDef must be above 0 and fit inside 31 bits. */ - if (!ino->i_size || ino->i_size > 0x7fffffff) + i_size = i_size_read(ino); + if (i_size <= 0 || i_size > 0x7fffffff) goto iput_failed; - vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(ino->i_size); + vol->attrdef = (ATTR_DEF*)ntfs_malloc_nofs(i_size); if (!vol->attrdef) goto iput_failed; index = 0; - max_index = ino->i_size >> PAGE_CACHE_SHIFT; + max_index = i_size >> PAGE_CACHE_SHIFT; size = PAGE_CACHE_SIZE; while (index < max_index) { /* Read the attrdef table and copy it into the linear buffer. */ @@ -1239,12 +1263,12 @@ ntfs_unmap_page(page); }; if (size == PAGE_CACHE_SIZE) { - size = ino->i_size & ~PAGE_CACHE_MASK; + size = i_size & ~PAGE_CACHE_MASK; if (size) goto read_partial_attrdef_page; } - vol->attrdef_size = ino->i_size; - ntfs_debug("Read %llu bytes from $AttrDef.", ino->i_size); + vol->attrdef_size = i_size; + ntfs_debug("Read %llu bytes from $AttrDef.", i_size); iput(ino); return TRUE; free_iput_failed: @@ -1267,10 +1291,11 @@ */ static BOOL load_and_init_upcase(ntfs_volume *vol) { + loff_t i_size; struct super_block *sb = vol->sb; struct inode *ino; struct page *page; - unsigned long index, max_index; + pgoff_t index, max_index; unsigned int size; int i, max; @@ -1286,14 +1311,15 @@ * The upcase size must not be above 64k Unicode characters, must not * be zero and must be a multiple of sizeof(ntfschar). */ - if (!ino->i_size || ino->i_size & (sizeof(ntfschar) - 1) || - ino->i_size > 64ULL * 1024 * sizeof(ntfschar)) + i_size = i_size_read(ino); + if (!i_size || i_size & (sizeof(ntfschar) - 1) || + i_size > 64ULL * 1024 * sizeof(ntfschar)) goto iput_upcase_failed; - vol->upcase = (ntfschar*)ntfs_malloc_nofs(ino->i_size); + vol->upcase = (ntfschar*)ntfs_malloc_nofs(i_size); if (!vol->upcase) goto iput_upcase_failed; index = 0; - max_index = ino->i_size >> PAGE_CACHE_SHIFT; + max_index = i_size >> PAGE_CACHE_SHIFT; size = PAGE_CACHE_SIZE; while (index < max_index) { /* Read the upcase table and copy it into the linear buffer. */ @@ -1306,13 +1332,13 @@ ntfs_unmap_page(page); }; if (size == PAGE_CACHE_SIZE) { - size = ino->i_size & ~PAGE_CACHE_MASK; + size = i_size & ~PAGE_CACHE_MASK; if (size) goto read_partial_upcase_page; } - vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS; + vol->upcase_len = i_size >> UCHAR_T_SIZE_BITS; ntfs_debug("Read %llu bytes from $UpCase (expected %zu bytes).", - ino->i_size, 64 * 1024 * sizeof(ntfschar)); + i_size, 64 * 1024 * sizeof(ntfschar)); iput(ino); down(&ntfs_lock); if (!default_upcase) { @@ -1435,7 +1461,8 @@ iput(vol->lcnbmp_ino); goto bitmap_failed; } - if ((vol->nr_clusters + 7) >> 3 > vol->lcnbmp_ino->i_size) { + NInoSetSparseDisabled(NTFS_I(vol->lcnbmp_ino)); + if ((vol->nr_clusters + 7) >> 3 > i_size_read(vol->lcnbmp_ino)) { iput(vol->lcnbmp_ino); bitmap_failed: ntfs_error(sb, "Failed to load $Bitmap."); @@ -1486,6 +1513,12 @@ unmap_mft_record(NTFS_I(vol->vol_ino)); printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, vol->minor_ver); + if (vol->major_ver < 3 && NVolSparseEnabled(vol)) { + ntfs_warning(vol->sb, "Disabling sparse support due to NTFS " + "volume version %i.%i (need at least version " + "3.0).", vol->major_ver, vol->minor_ver); + NVolClearSparseEnabled(vol); + } #ifdef NTFS_RW /* Make sure that no unsupported volume flags are set. */ if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { @@ -1959,8 +1992,7 @@ struct address_space *mapping = vol->lcnbmp_ino->i_mapping; filler_t *readpage = (filler_t*)mapping->a_ops->readpage; struct page *page; - unsigned long index, max_index; - unsigned int max_size; + pgoff_t index, max_index; ntfs_debug("Entering."); /* Serialize accesses to the cluster bitmap. */ @@ -1972,11 +2004,10 @@ */ max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - /* Use multiples of 4 bytes. */ - max_size = PAGE_CACHE_SIZE >> 2; - ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%x.", - max_index, max_size); - for (index = 0UL; index < max_index; index++) { + /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */ + ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.", + max_index, PAGE_CACHE_SIZE / 4); + for (index = 0; index < max_index; index++) { unsigned int i; /* * Read the page from page cache, getting it from backing store @@ -2008,7 +2039,7 @@ * the result as all out of range bytes are set to zero by * ntfs_readpage(). */ - for (i = 0; i < max_size; i++) + for (i = 0; i < PAGE_CACHE_SIZE / 4; i++) nr_free -= (s64)hweight32(kaddr[i]); kunmap_atomic(kaddr, KM_USER0); page_cache_release(page); @@ -2031,6 +2062,8 @@ /** * __get_nr_free_mft_records - return the number of free inodes on a volume * @vol: ntfs volume for which to obtain free inode count + * @nr_free: number of mft records in filesystem + * @max_index: maximum number of pages containing set bits * * Calculate the number of free mft records (inodes) on the mounted NTFS * volume @vol. We actually calculate the number of mft records in use instead @@ -2043,32 +2076,20 @@ * * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing. */ -static unsigned long __get_nr_free_mft_records(ntfs_volume *vol) +static unsigned long __get_nr_free_mft_records(ntfs_volume *vol, + s64 nr_free, const pgoff_t max_index) { - s64 nr_free; u32 *kaddr; struct address_space *mapping = vol->mftbmp_ino->i_mapping; filler_t *readpage = (filler_t*)mapping->a_ops->readpage; struct page *page; - unsigned long index, max_index; - unsigned int max_size; + pgoff_t index; ntfs_debug("Entering."); - /* Number of mft records in file system (at this point in time). */ - nr_free = vol->mft_ino->i_size >> vol->mft_record_size_bits; - /* - * Convert the maximum number of set bits into bytes rounded up, then - * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we - * have one full and one partial page max_index = 2. - */ - max_index = ((((NTFS_I(vol->mft_ino)->initialized_size >> - vol->mft_record_size_bits) + 7) >> 3) + - PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - /* Use multiples of 4 bytes. */ - max_size = PAGE_CACHE_SIZE >> 2; + /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */ ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = " - "0x%x.", max_index, max_size); - for (index = 0UL; index < max_index; index++) { + "0x%lx.", max_index, PAGE_CACHE_SIZE / 4); + for (index = 0; index < max_index; index++) { unsigned int i; /* * Read the page from page cache, getting it from backing store @@ -2100,7 +2121,7 @@ * the result as all out of range bytes are set to zero by * ntfs_readpage(). */ - for (i = 0; i < max_size; i++) + for (i = 0; i < PAGE_CACHE_SIZE / 4; i++) nr_free -= (s64)hweight32(kaddr[i]); kunmap_atomic(kaddr, KM_USER0); page_cache_release(page); @@ -2134,8 +2155,11 @@ */ static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) { - ntfs_volume *vol = NTFS_SB(sb); s64 size; + ntfs_volume *vol = NTFS_SB(sb); + ntfs_inode *mft_ni = NTFS_I(vol->mft_ino); + pgoff_t max_index; + unsigned long flags; ntfs_debug("Entering."); /* Type of filesystem. */ @@ -2143,13 +2167,13 @@ /* Optimal transfer block size. */ sfs->f_bsize = PAGE_CACHE_SIZE; /* - * Total data blocks in file system in units of f_bsize and since + * Total data blocks in filesystem in units of f_bsize and since * inodes are also stored in data blocs ($MFT is a file) this is just * the total clusters. */ sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >> PAGE_CACHE_SHIFT; - /* Free data blocks in file system in units of f_bsize. */ + /* Free data blocks in filesystem in units of f_bsize. */ size = get_nr_free_clusters(vol) << vol->cluster_size_bits >> PAGE_CACHE_SHIFT; if (size < 0LL) @@ -2158,17 +2182,27 @@ sfs->f_bavail = sfs->f_bfree = size; /* Serialize accesses to the inode bitmap. */ down_read(&vol->mftbmp_lock); - /* Number of inodes in file system (at this point in time). */ - sfs->f_files = vol->mft_ino->i_size >> vol->mft_record_size_bits; + read_lock_irqsave(&mft_ni->size_lock, flags); + size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits; + /* + * Convert the maximum number of set bits into bytes rounded up, then + * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we + * have one full and one partial page max_index = 2. + */ + max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits) + + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + read_unlock_irqrestore(&mft_ni->size_lock, flags); + /* Number of inodes in filesystem (at this point in time). */ + sfs->f_files = size; /* Free inodes in fs (based on current total count). */ - sfs->f_ffree = __get_nr_free_mft_records(vol); + sfs->f_ffree = __get_nr_free_mft_records(vol, size, max_index); up_read(&vol->mftbmp_lock); /* * File system id. This is extremely *nix flavour dependent and even * within Linux itself all fs do their own thing. I interpret this to * mean a unique id associated with the mounted fs and not the id - * associated with the file system driver, the latter is already given - * by the file system type in sfs->f_type. Thus we use the 64-bit + * associated with the filesystem driver, the latter is already given + * by the filesystem type in sfs->f_type. Thus we use the 64-bit * volume serial number splitting it into two 32-bit parts. We enter * the least significant 32-bits in f_fsid[0] and the most significant * 32-bits in f_fsid[1]. @@ -2219,53 +2253,19 @@ proc. */ }; - -/** - * Declarations for NTFS specific export operations (fs/ntfs/namei.c). - */ -extern struct dentry *ntfs_get_parent(struct dentry *child_dent); -extern struct dentry *ntfs_get_dentry(struct super_block *sb, void *fh); - -/** - * Export operations allowing NFS exporting of mounted NTFS partitions. - * - * We use the default ->decode_fh() and ->encode_fh() for now. Note that they - * use 32 bits to store the inode number which is an unsigned long so on 64-bit - * architectures is usually 64 bits so it would all fail horribly on huge - * volumes. I guess we need to define our own encode and decode fh functions - * that store 64-bit inode numbers at some point but for now we will ignore the - * problem... - * - * We also use the default ->get_name() helper (used by ->decode_fh() via - * fs/exportfs/expfs.c::find_exported_dentry()) as that is completely fs - * independent. - * - * The default ->get_parent() just returns -EACCES so we have to provide our - * own and the default ->get_dentry() is incompatible with NTFS due to not - * allowing the inode number 0 which is used in NTFS for the system file $MFT - * and due to using iget() whereas NTFS needs ntfs_iget(). - */ -static struct export_operations ntfs_export_ops = { - .get_parent = ntfs_get_parent, /* Find the parent of a given - directory. */ - .get_dentry = ntfs_get_dentry, /* Find a dentry for the inode - given a file handle - sub-fragment. */ -}; - /** - * ntfs_fill_super - mount an ntfs files system - * @sb: super block of ntfs file system to mount + * ntfs_fill_super - mount an ntfs filesystem + * @sb: super block of ntfs filesystem to mount * @opt: string containing the mount options * @silent: silence error output * * ntfs_fill_super() is called by the VFS to mount the device described by @sb - * with the mount otions in @data with the NTFS file system. + * with the mount otions in @data with the NTFS filesystem. * * If @silent is true, remain silent even if errors are detected. This is used - * during bootup, when the kernel tries to mount the root file system with all - * registered file systems one after the other until one succeeds. This implies - * that all file systems except the correct one will quite correctly and + * during bootup, when the kernel tries to mount the root filesystem with all + * registered filesystems one after the other until one succeeds. This implies + * that all filesystems except the correct one will quite correctly and * expectedly return an error, but nobody wants to see error messages when in * fact this is what is supposed to happen. * @@ -2325,6 +2325,9 @@ unlock_kernel(); + /* By default, enable sparse support. */ + NVolSetSparseEnabled(vol); + /* Important to get the mount options dealt with now. */ if (!parse_options(vol, (char*)opt)) goto err_out_now; @@ -2347,7 +2350,8 @@ } /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */ - vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS; + vol->nr_blocks = i_size_read(sb->s_bdev->bd_inode) >> + NTFS_BLOCK_SIZE_BITS; /* Read the boot sector and return unlocked buffer head to it. */ if (!(bh = read_ntfs_boot_sector(sb, silent))) { @@ -2581,7 +2585,7 @@ */ kmem_cache_t *ntfs_name_cache; -/* Slab caches for efficient allocation/deallocation of of inodes. */ +/* Slab caches for efficient allocation/deallocation of inodes. */ kmem_cache_t *ntfs_inode_cache; kmem_cache_t *ntfs_big_inode_cache; @@ -2705,7 +2709,7 @@ ntfs_debug("NTFS driver registered successfully."); return 0; /* Success! */ } - printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n"); + printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); sysctl_err_out: kmem_cache_destroy(ntfs_big_inode_cache); @@ -2719,7 +2723,7 @@ kmem_cache_destroy(ntfs_index_ctx_cache); ictx_err_out: if (!err) { - printk(KERN_CRIT "NTFS: Aborting NTFS file system driver " + printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver " "registration...\n"); err = -ENOMEM; } @@ -2759,7 +2763,7 @@ } MODULE_AUTHOR("Anton Altaparmakov "); -MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2004 Anton Altaparmakov"); +MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2005 Anton Altaparmakov"); MODULE_VERSION(NTFS_VERSION); MODULE_LICENSE("GPL"); #ifdef DEBUG Index: fs/ntfs/sysctl.c =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/sysctl.c (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/sysctl.c (mode:100644) @@ -3,7 +3,7 @@ * the Linux-NTFS project. Adapted from the old NTFS driver, * Copyright (C) 1997 Martin von Löwis, Régis Duchesne * - * Copyright (c) 2002-2004 Anton Altaparmakov + * Copyright (c) 2002-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -67,7 +67,7 @@ return -ENOMEM; #ifdef CONFIG_PROC_FS /* - * If the proc file system is in use and we are a module, need + * If the proc filesystem is in use and we are a module, need * to set the owner of our proc entry to our module. In the * non-modular case, THIS_MODULE is NULL, so this is ok. */ Index: fs/ntfs/time.h =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/time.h (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/time.h (mode:100644) @@ -1,7 +1,7 @@ /* * time.h - NTFS time conversion functions. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -87,7 +87,7 @@ struct timespec ts; /* Subtract the NTFS time offset. */ - s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET; + u64 t = (u64)(sle64_to_cpu(time) - NTFS_TIME_OFFSET); /* * Convert the time to 1-second intervals and the remainder to * 1-nano-second intervals. Index: fs/ntfs/volume.h =================================================================== --- 95866d31faa6db4ec786399296238344c7cfea0c/fs/ntfs/volume.h (mode:100644) +++ e9f8a061e958579a6de1d3ee133fd30ca4139f9c/fs/ntfs/volume.h (mode:100644) @@ -2,7 +2,7 @@ * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part * of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -54,7 +54,7 @@ mode_t dmask; /* The mask for directory permissions. */ u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ - u8 on_errors; /* What to do on file system errors. */ + u8 on_errors; /* What to do on filesystem errors. */ /* NTFS bootsector provided information. */ u16 sector_size; /* in bytes */ u8 sector_size_bits; /* log2(sector_size) */ @@ -141,6 +141,7 @@ file names in WIN32 namespace. */ NV_LogFileEmpty, /* 1: $LogFile journal is empty. */ NV_QuotaOutOfDate, /* 1: $Quota is out of date. */ + NV_SparseEnabled, /* 1: May create sparse files. */ } ntfs_volume_flags; /* @@ -167,5 +168,6 @@ NVOL_FNS(CaseSensitive) NVOL_FNS(LogFileEmpty) NVOL_FNS(QuotaOutOfDate) +NVOL_FNS(SparseEnabled) #endif /* _LINUX_NTFS_VOLUME_H */