Prevent opening binary/other garbage files

helxi brucewayneshit at gmail.com
Mon Oct 1 15:21:24 UTC 2018


On Sunday, 30 September 2018 at 03:19:11 UTC, Adam D. Ruppe wrote:
> On Saturday, 29 September 2018 at 23:46:26 UTC, helxi wrote:
>> Thanks. Would you say 
>> https://dlang.org/library/std/encoding/get_bom.html is useful 
>> in this context?
>
> Eh, not really, most text files will not have one.

Hi,

I tried out https://dlang.org/library/std/utf/validate.html 
before manually checking for encoding myself so I ended up with 
the code below. I was fairly surprised that "*.o" (object) files 
are UTF encoded! Is it normal?

import std.stdio : File, lines, stdout;

void panic(in string message, int exitCode = 1) {
	import core.stdc.stdlib : exit;
	import std.stdio : stderr, writeln;

	stderr.writeln(message);
	exit(exitCode);
}

void writeFunc(ulong occerenceNumber, ulong lineNumber, in ref 
string fileName,
		in ref string line, File ofile = stdout) {
	import std.stdio : writef;

	ofile.writef("%s: L:%s: F:\"%s\":\n%s\n", occerenceNumber, 
lineNumber, fileName, line);
}

void treverseDirectories(in string path, in string term)
in {
	import std.file : isDir;

	if (!isDir(path))
		panic("Cannot access directory: " ~ path);
}
do {
	import std.file : dirEntries, SpanMode;

	ulong occerenceNumber, filesChecked, filesIgnored; // = 0;
	File currentFile;
	foreach (string fileName; dirEntries(path, SpanMode.breadth)) {
		try {
			currentFile = File(fileName, "r");
			++filesChecked;
			foreach (ulong lineNumber, string currentLine; 
lines(currentFile)) {
				if (lineNumber == 0) {
					// check if the file is encoded with proper UTF
					// if Line 0 is not UTF encoded, move on to the next file

					// I hope the compiler unrolls this if condition
					import std.utf : validate;

					validate(currentLine);
                                         // throws exception if 
the file is not UTF encoded
				}
				import std.algorithm : canFind;

				if (canFind(currentLine, term)) {
					writeFunc(++occerenceNumber, lineNumber, fileName, 
currentLine);
				}
			}
		}
		catch (Exception e) {
			filesIgnored++;
		}
	}
	//summarize
	import std.stdio : writefln;

	writefln("Total match found:\t%s\nTotal files 
checked:\t%s\nTotal files ignored:\t%s\n",
			occerenceNumber, filesChecked, filesIgnored);
}

void main(string[] args) {
	import std.getopt : getopt;

	string term, directory;
	getopt(args, "term|t", &term, "directory|d", &directory);

	if (!directory) {
		// if directory not specified, start working with the current 
directory
		import std.file : getcwd;

		directory = getcwd();
	}

	if (!term)
		panic("Term not specified.");

	treverseDirectories(directory, term);
}


/*

Output:  https://pastebin.com/PZ8nCaYf


More information about the Digitalmars-d-learn mailing list