Code Coverage Statistics for Source File

c:\Tools\SD3\src\Libraries\ICSharpCode.TextEditor\Project\Src\Util\FileReader.cs

Sequence Point Coverage
N/A
0 of 0
Branch Coverage
N/A
0 of 0
Lines
152
Highlight: Uncovered Code Covered Code
L V Source
1
// <file>
2
//     <copyright see="prj:///doc/copyright.txt"/>
3
//     <license see="prj:///doc/license.txt"/>
4
//     <owner name="Daniel Grunwald" email="daniel@danielgrunwald.de"/>
5
//     <version>$Revision: 2682 $</version>
6
// </file>
7
8
using System;
9
using System.IO;
10
using System.Text;
11
12
namespace ICSharpCode.TextEditor.Util
13
{
14
	/// <summary>
15
	/// Class that can open text files with auto-detection of the encoding.
16
	/// </summary>
17
	public static class FileReader
18
	{
19
		public static bool IsUnicode(Encoding encoding)
20
		{
21
			int codepage = encoding.CodePage;
22
			// return true if codepage is any UTF codepage
23
			return codepage == 65001 || codepage == 65000 || codepage == 1200 || codepage == 1201;
24
		}
25
		
26
		public static string ReadFileContent(Stream fs, ref Encoding encoding)
27
		{
28
			using (StreamReader reader = OpenStream(fs, encoding)) {
29
				reader.Peek();
30
				encoding = reader.CurrentEncoding;
31
				return reader.ReadToEnd();
32
			}
33
		}
34
		
35
		public static string ReadFileContent(string fileName, Encoding encoding)
36
		{
37
			using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) {
38
				return ReadFileContent(fs, ref encoding);
39
			}
40
		}
41
		
42
		public static StreamReader OpenStream(Stream fs, Encoding defaultEncoding)
43
		{
44
			if (fs == null)
45
				throw new ArgumentNullException("fs");
46
			
47
			if (fs.Length >= 2) {
48
				// the autodetection of StreamReader is not capable of detecting the difference
49
				// between ISO-8859-1 and UTF-8 without BOM.
50
				int firstByte = fs.ReadByte();
51
				int secondByte = fs.ReadByte();
52
				switch ((firstByte << 8) | secondByte) {
53
					case 0x0000: // either UTF-32 Big Endian or a binary file; use StreamReader
54
					case 0xfffe: // Unicode BOM (UTF-16 LE or UTF-32 LE)
55
					case 0xfeff: // UTF-16 BE BOM
56
					case 0xefbb: // start of UTF-8 BOM
57
						// StreamReader autodetection works
58
						fs.Position = 0;
59
						return new StreamReader(fs);
60
					default:
61
						return AutoDetect(fs, (byte)firstByte, (byte)secondByte, defaultEncoding);
62
				}
63
			} else {
64
				if (defaultEncoding != null) {
65
					return new StreamReader(fs, defaultEncoding);
66
				} else {
67
					return new StreamReader(fs);
68
				}
69
			}
70
		}
71
		
72
		static StreamReader AutoDetect(Stream fs, byte firstByte, byte secondByte, Encoding defaultEncoding)
73
		{
74
			int max = (int)Math.Min(fs.Length, 500000); // look at max. 500 KB
75
			const int ASCII = 0;
76
			const int Error = 1;
77
			const int UTF8  = 2;
78
			const int UTF8Sequence = 3;
79
			int state = ASCII;
80
			int sequenceLength = 0;
81
			byte b;
82
			for (int i = 0; i < max; i++) {
83
				if (i == 0) {
84
					b = firstByte;
85
				} else if (i == 1) {
86
					b = secondByte;
87
				} else {
88
					b = (byte)fs.ReadByte();
89
				}
90
				if (b < 0x80) {
91
					// normal ASCII character
92
					if (state == UTF8Sequence) {
93
						state = Error;
94
						break;
95
					}
96
				} else if (b < 0xc0) {
97
					// 10xxxxxx : continues UTF8 byte sequence
98
					if (state == UTF8Sequence) {
99
						--sequenceLength;
100
						if (sequenceLength < 0) {
101
							state = Error;
102
							break;
103
						} else if (sequenceLength == 0) {
104
							state = UTF8;
105
						}
106
					} else {
107
						state = Error;
108
						break;
109
					}
110
				} else if (b >= 0xc2 && b < 0xf5) {
111
					// beginning of byte sequence
112
					if (state == UTF8 || state == ASCII) {
113
						state = UTF8Sequence;
114
						if (b < 0xe0) {
115
							sequenceLength = 1; // one more byte following
116
						} else if (b < 0xf0) {
117
							sequenceLength = 2; // two more bytes following
118
						} else {
119
							sequenceLength = 3; // three more bytes following
120
						}
121
					} else {
122
						state = Error;
123
						break;
124
					}
125
				} else {
126
					// 0xc0, 0xc1, 0xf5 to 0xff are invalid in UTF-8 (see RFC 3629)
127
					state = Error;
128
					break;
129
				}
130
			}
131
			fs.Position = 0;
132
			switch (state) {
133
				case ASCII:
134
				case Error:
135
					// when the file seems to be ASCII or non-UTF8,
136
					// we read it using the user-specified encoding so it is saved again
137
					// using that encoding.
138
					if (IsUnicode(defaultEncoding)) {
139
						// the file is not Unicode, so don't read it using Unicode even if the
140
						// user has choosen Unicode as the default encoding.
141
						
142
						// If we don't do this, SD will end up always adding a Byte Order Mark
143
						// to ASCII files.
144
						defaultEncoding = Encoding.Default; // use system encoding instead
145
					}
146
					return new StreamReader(fs, defaultEncoding);
147
				default:
148
					return new StreamReader(fs);
149
			}
150
		}
151
	}
152
}