Let’s take handling any encoding of files one step further.
We need to look for specific text in in files in a directory regardless of encoding. Here is one way in Python.
#! /usr/bin/python import sys import os.path import os import re import fnmatch def DecodeBytes(byteArray, codecs=['utf-8', 'utf-16']): for codec in codecs: try: return byteArray.decode(codec) except: pass def ReadLinesFromFile(filename): file = open(filename, "rb") rawbytes = file.read() file.close() content = DecodeBytes(rawbytes) if content is not None: return content.split(os.linesep) # this came from http://stackoverflow.com/questions/1863236/grep-r-in-python # with a substitution of ReadLinesFromFile and a file name match filter def RecursiveGrep(pattern, dir, match): r = re.compile(pattern) for parent, dnames, fnames in os.walk(dir): fnames = fnmatch.filter(fnames, match) for fname in fnames: filename = os.path.join(parent, fname) if os.path.isfile(filename): lines = ReadLinesFromFile(filename) if lines is not None: idx = 0 for line in lines: if r.search(line): yield filename + "|" + str(idx) + "|" + line.strip() idx += 1 lines = RecursiveGrep("needle", "\yourpath", "*.cs")
The will recurse all subdirectories, looking in all .cs files to find needed returning the data in this format (pipe separated):
full file path|line number|line content
Very useful on Windows with multilingual files.