patch-2.1.74 linux/fs/ntfs/util.c

Next file: linux/fs/ntfs/util.h
Previous file: linux/fs/ntfs/types.h
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.73/linux/fs/ntfs/util.c linux/fs/ntfs/util.c
@@ -0,0 +1,343 @@
+/*
+ *  util.c
+ *  Miscellaneous support
+ *
+ *  Copyright (C) 1997 Martin von Löwis
+ *  Copyright (C) 1997 Régis Duchesne
+ *
+ *  The utf8 routines are copied from Python wstrop module,
+ */
+
+#include "types.h"
+#include "struct.h"
+#include "util.h"
+
+#include <errno.h>
+/* FreeBSD doesn't seem to have EILSEQ in errno.h */
+#ifndef EILSEQ
+# define EILSEQ	EINVAL
+#endif
+#include "support.h"
+
+/* Converts a single wide character to a sequence of utf8 bytes.
+ * Returns the number of bytes, or 0 on error.
+ */
+static int
+to_utf8(ntfs_u16 c,unsigned char* buf)
+{
+	if(c==0)
+		return 0; /* No support for embedded 0 runes */
+	if(c<0x80){
+		if(buf)buf[0]=c;
+		return 1;
+	}
+	if(c<0x800){
+		if(buf){
+			buf[0] = 0xc0 | (c>>6);
+			buf[1] = 0x80 | (c & 0x3f);
+		}
+		return 2;
+	}
+	if(c<0x10000){
+		if(buf){
+			buf[0] = 0xe0 | (c>>12);
+			buf[1] = 0x80 | ((c>>6) & 0x3f);
+			buf[2] = 0x80 | (c & 0x3f);
+		}
+		return 3;
+	}
+	/* We don't support characters above 0xFFFF in NTFS */
+	return 0;
+}
+
+/* Decodes a sequence of utf8 bytes into a single wide character.
+ * Returns the number of bytes consumed, or 0 on error
+ */
+static int
+from_utf8(const unsigned char* str,ntfs_u16 *c)
+{
+	int l=0,i;
+
+	if(*str<0x80){
+		*c = *str;
+		return 1;
+	}
+	if(*str<0xc0) /* lead byte must not be 10xxxxxx */
+		return 0;   /* is c0 a possible lead byte? */
+	if(*str<0xe0){         /* 110xxxxx */
+		*c = *str & 0x1f;
+		l=2;
+	}else if(*str<0xf0){   /* 1110xxxx */
+		*c = *str & 0xf;
+		l=3;
+	}else if(*str<0xf8){   /* 11110xxx */
+		*c = *str & 7;
+		l=4;
+	}else /* We don't support characters above 0xFFFF in NTFS */
+		return 0;
+	 
+
+	for(i=1;i<l;i++){
+		/* all other bytes must be 10xxxxxx */
+		if((str[i] & 0xc0) != 0x80)
+			return 0;
+		*c <<= 6;
+		*c |= str[i] & 0x3f;
+	}
+	return l;
+}
+
+/* Converts wide string to UTF-8. Expects two in- and two out-parameters.
+ * Returns 0 on success, or error code. 
+ * The caller has to free the result string.
+ * There is no support for UTF-16, yet
+ */
+static int ntfs_dupuni2utf8(ntfs_u16* in, int in_len,char **out,int *out_len)
+{
+	int i,tmp;
+	int len8;
+	unsigned char *result;
+
+	ntfs_debug(DEBUG_OTHER,"converting l=%d\n",in_len);
+	/* count the length of the resulting UTF-8 */
+	for(i=len8=0;i<in_len;i++){
+		tmp=to_utf8(in[i],0);
+		if(!tmp)
+			/* invalid character */
+			return EILSEQ;
+		len8+=tmp;
+	}
+	*out=result=ntfs_malloc(len8+1); /* allow for zero-termination */
+
+	if(!result)
+		return ENOMEM;
+	result[len8]='\0';
+	*out_len=len8;
+	for(i=len8=0;i<in_len;i++)
+		len8+=to_utf8(in[i],result+len8);
+	return 0;
+}
+
+/* Converts an UTF-8 sequence to a wide string. Same conventions as the
+ * previous function
+ */
+static int ntfs_duputf82uni(unsigned char* in, int in_len,ntfs_u16** out,int *out_len)
+{
+	int i,tmp;
+	int len16;
+
+	ntfs_u16* result;
+	ntfs_u16 wtmp;
+	for(i=len16=0;i<in_len;i+=tmp,len16++){
+		tmp=from_utf8(in+i,&wtmp);
+		if(!tmp)
+			return EILSEQ;
+	}
+	*out=result=ntfs_malloc(2*(len16+1));
+	if(!result)
+		return ENOMEM;
+	result[len16]=0;
+	*out_len=len16;
+	for(i=len16=0;i<in_len;i+=tmp,len16++)
+		tmp=from_utf8(in+i,result+len16);
+	return 0;
+}
+
+/* See above. Produces ISO-8859-1 from wide strings */
+static int ntfs_dupuni288591(ntfs_u16* in,int in_len,char** out,int *out_len)
+{
+	int i;
+	char *result;
+
+	/* check for characters out of range */
+	for(i=0;i<in_len;i++)
+		if(in[i]>=256)
+			return EILSEQ;
+	*out=result=ntfs_malloc(in_len+1);
+	if(!result)
+		return ENOMEM;
+	result[in_len]='\0';
+	*out_len=in_len;
+	for(i=0;i<in_len;i++)
+		result[i]=in[i];
+	return 0;
+}
+
+/* See above */
+static int ntfs_dup885912uni(unsigned char* in,int in_len,ntfs_u16 **out,int *out_len)
+{
+	int i;
+
+	ntfs_u16* result;
+	*out=result=ntfs_malloc(2*in_len);
+	if(!result)
+		return ENOMEM;
+	*out_len=in_len;
+	for(i=0;i<in_len;i++)
+		result[i]=in[i];
+	return 0;
+}
+
+/* Encodings dispatcher */
+int ntfs_encodeuni(ntfs_volume *vol,ntfs_u16 *in, int in_len,
+		   char **out, int *out_len)
+{
+	if(vol->nct & nct_utf8)
+		return ntfs_dupuni2utf8(in,in_len,out,out_len);
+	else if(vol->nct & nct_iso8859_1)
+		return ntfs_dupuni288591(in,in_len,out,out_len);
+	else if(vol->nct & (nct_map|nct_uni_xlate))
+		/* uni_xlate is handled inside map */
+		return ntfs_dupuni2map(vol,in,in_len,out,out_len);
+	else
+		return EINVAL; /* unknown encoding */
+}
+
+int ntfs_decodeuni(ntfs_volume *vol,char *in, int in_len,
+		   ntfs_u16 **out, int *out_len)
+{
+	if(vol->nct & nct_utf8)
+		return ntfs_duputf82uni(in,in_len,out,out_len);
+	else if(vol->nct & nct_iso8859_1)
+		return ntfs_dup885912uni(in,in_len,out,out_len);
+	else if(vol->nct & (nct_map | nct_uni_xlate))
+		return ntfs_dupmap2uni(vol,in,in_len,out,out_len);
+	else
+		return EINVAL;
+}
+
+/* Same address space copies */
+void ntfs_put(ntfs_io *dest,void *src,ntfs_size_t n)
+{
+	ntfs_memcpy(dest->param,src,n);
+	dest->param+=n;
+}
+
+void ntfs_get(void* dest,ntfs_io *src,ntfs_size_t n)
+{
+	ntfs_memcpy(dest,src->param,n);
+	src->param+=n;
+}
+
+void *ntfs_calloc(int size)
+{
+	void *result=ntfs_malloc(size);
+
+	if(result)
+		ntfs_bzero(result,size);
+	return result;
+}
+
+#if 0
+/* copy len unicode characters from from to to :) */
+void ntfs_uni2ascii(char *to,char *from,int len)
+{
+	int i;
+
+	for(i=0;i<len;i++)
+		to[i]=from[2*i];
+	to[i]='\0';
+}
+#endif
+
+/* copy len asci characters from from to to :) */
+void ntfs_ascii2uni(short int *to,char *from,int len)
+{
+	int i;
+
+	for(i=0;i<len;i++)
+		to[i]=from[i];
+	to[i]=0;
+}
+
+/* strncmp for Unicode strings */
+int ntfs_uni_strncmp(short int* a,short int *b,int n)
+{
+	int i;
+
+	for(i=0;i<n;i++)
+	{
+		if(a[i]<b[i])
+			return -1;
+		if(b[i]<a[i])
+			return 1;
+	}
+	return 0;
+}
+
+/* strncmp between Unicode and ASCII strings */
+int ntfs_ua_strncmp(short int* a,char* b,int n)
+{
+	int i;
+
+	for(i=0;i<n;i++)
+	{
+		if(a[i]<b[i])
+			return -1;
+		if(b[i]<a[i])
+			return 1;
+	}
+	return 0;
+}
+
+/* Convert the NT UTC (based 1.1.1601, in hundred nanosecond units)
+ * into Unix UTC (based 1.1.1970, in seconds)
+ */
+ntfs_time_t ntfs_ntutc2unixutc(ntfs_time64_t ntutc)
+{
+/*
+ * This is very gross because
+ * 1: We must do 64-bit division on a 32-bit machine
+ * 2: We can't use libgcc for long long operations in the kernel
+ * 3: Floating point math in the kernel would corrupt user data
+ */
+	const unsigned int D = 10000000;
+	unsigned int H = (ntutc >> 32);
+	unsigned int L = (unsigned int)ntutc;
+	unsigned int numerator2;
+	unsigned int lowseconds;
+	unsigned int result;
+
+	/* It is best to subtract 0x019db1ded53e8000 first. */
+	/* Then the 1601-based date becomes a 1970-based date. */
+	if(L < (unsigned)0xd53e8000) H--;
+	L -= (unsigned)0xd53e8000;
+	H -= (unsigned)0x019db1de;
+
+	/*
+	 * Now divide 64-bit numbers on a 32-bit machine :-)
+	 * With the subtraction already done, the result fits in 32 bits.
+	 * The numerator fits in 56 bits and the denominator fits
+	 * in 24 bits, so we can shift by 8 bits to make this work.
+	 */
+
+	numerator2  = (H<<8) | (L>>24);
+	result      = (numerator2 / D);   /* shifted 24 right!! */
+	lowseconds  = result << 24;
+
+	numerator2  = ((numerator2-result*D)<<8) | ((L>>16)&0xff);
+	result      = (numerator2 / D);   /* shifted 16 right!! */
+	lowseconds |= result << 16;
+
+	numerator2  = ((numerator2-result*D)<<8) | ((L>>8)&0xff);
+	result      = (numerator2 / D);   /* shifted 8 right!! */
+	lowseconds |= result << 8;
+
+	numerator2  = ((numerator2-result*D)<<8) | (L&0xff);
+	result      = (numerator2 / D);   /* not shifted */
+	lowseconds |= result;
+
+	return lowseconds;
+}
+
+/* Convert the Unix UTC into NT UTC */
+ntfs_time64_t ntfs_unixutc2ntutc(ntfs_time_t t)
+{
+	return ((t + (ntfs_time64_t)(369*365+89)*24*3600) * 10000000);
+}
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov