pysal.lib.io.fileio 源代码

"""
FileIO: Module for reading and writing various file types in a Pythonic way.
This module should not be used directly, instead...
import pysal.core.FileIO as FileIO
Readers and Writers will mimic python file objects.
.seek(n) seeks to the n'th object
.read(n) reads n objects, default == all
.next() reads the next object
"""

__author__ = "Charles R Schmidt <schmidtc@gmail.com>"

__all__ = ['FileIO']
import os.path
from warnings import warn
from ..common import MISSINGVALUE


class FileIO_MetaCls(type):
    """
    This Meta Class is instantiated when the class is first defined.
    All subclasses of FileIO also inherit this meta class, which registers their abilities with the FileIO registry.
    Subclasses must contain FORMATS and MODES (both are type(list))
    """
    def __new__(mcs, name, bases, dict):
        cls = type.__new__(mcs, name, bases, dict)
        if name != 'FileIO' and name != 'DataTable':
            if "FORMATS" in dict and "MODES" in dict:
                #print "Registering %s with FileIO.\n\tFormats: %r\n\tModes: %r"%(name,dict['FORMATS'],dict['MODES'])
                FileIO._register(cls, dict['FORMATS'], dict['MODES'])
            else:
                raise TypeError("FileIO subclasses must have FORMATS and MODES defined")
        return cls


[文档]class FileIO(object, metaclass=FileIO_MetaCls): # should be a type? """ Metaclass for supporting spatial data file read and write How this works: FileIO.open(\*args) == FileIO(\*args) When creating a new instance of FileIO the .__new__ method intercepts .__new__ parses the filename to determine the fileType next, .__registry and checked for that type. Each type supports one or more modes ['r','w','a',etc] If we support the type and mode, an instance of the appropriate handler is created and returned. All handlers must inherit from this class, and by doing so are automatically added to the .__registry and are forced to conform to the prescribed API. The metaclass takes cares of the registration by parsing the class definition. It doesn't make much sense to treat weights in the same way as shapefiles and dbfs, ....for now we'll just return an instance of W on mode='r' .... on mode='w', .write will expect an instance of W """ __registry = {} # {'shp':{'r':[OGRshpReader,pysalShpReader]}} def __new__(cls, dataPath='', mode='r', dataFormat=None): """ Intercepts the instantiation of FileIO and dispatches to the correct handler If no suitable handler is found a python file object is returned. """ if cls is FileIO: try: newCls = object.__new__(cls.__registry[cls.getType(dataPath, mode, dataFormat)][mode][0]) except KeyError: return open(dataPath, mode) return newCls else: return object.__new__(cls) @staticmethod def getType(dataPath, mode, dataFormat=None): """Parse the dataPath and return the data type""" if dataFormat: ext = dataFormat else: ext = os.path.splitext(dataPath)[1] ext = ext.replace('.', '') ext = ext.lower() if ext == 'txt': f = open(dataPath, 'r') l1 = f.readline() l2 = f.readline() if ext == 'txt': try: n, k = l1.split(',') n, k = int(n), int(k) fields = l2.split(',') assert len(fields) == k return 'geoda_txt' except: return ext return ext @classmethod def _register(cls, parser, formats, modes): """ This method is called automatically via the MetaClass of FileIO subclasses This should be private, but that hides it from the MetaClass """ assert cls is FileIO for format in formats: if not format in cls.__registry: cls.__registry[format] = {} for mode in modes: if not mode in cls.__registry[format]: cls.__registry[format][mode] = [] cls.__registry[format][mode].append(parser) #cls.check() @classmethod def check(cls): """ Prints the contents of the registry """ print("PySAL File I/O understands the following file extensions:") for key, val in list(cls.__registry.items()): print("Ext: '.%s', Modes: %r" % (key, list(val.keys()))) @classmethod def open(cls, *args, **kwargs): """ Alias for FileIO() """ return cls(*args, **kwargs) class _By_Row: def __init__(self, parent): self.p = parent def __repr__(self): if not self.p.ids: return "keys: range(0,n)" else: return "keys: " + list(self.p.ids.keys()).__repr__() def __getitem__(self, key): if type(key) == list: r = [] if self.p.ids: for k in key: r.append(self.p.get(self.p.ids[k])) else: for k in key: r.append(self.p.get(k)) return r if self.p.ids: return self.p.get(self.p.ids[key]) else: return self.p.get(key) __call__ = __getitem__
[文档] def __init__(self, dataPath='', mode='r', dataFormat=None): self.dataPath = dataPath self.dataObj = '' self.mode = mode #pos Should ALWAYS be in the range 0,...,n #for custom IDs set the ids property. self.pos = 0 self.__ids = None # {'id':n} self.__rIds = None self.closed = False self._spec = [] self.header = []
def __getitem__(self, key): return self.by_row.__getitem__(key) @property def by_row(self): return self._By_Row(self) def __getIds(self): return self.__ids def __setIds(self, ids): """ Property Method for .ids Takes a list of ids and maps then to a 0 based index Need to provide a method to set ID's based on a fieldName preferably without reading the whole file. """ if isinstance(ids, list): try: assert len(ids) == len(set(ids)) except AssertionError: raise KeyError("IDs must be unique") # keys: ID values: i self.__ids = {} # keys: i values: ID self.__rIds = {} for i, id in enumerate(ids): self.__ids[id] = i self.__rIds[i] = id elif isinstance(ids, dict): self.__ids = ids self.__rIds = {} for id, n in list(ids.items()): self.__rIds[n] = id elif not ids: self.__ids = None self.__rIds = None ids = property(fget=__getIds, fset=__setIds) @property def rIds(self): return self.__rIds def __iter__(self): self.seek(0) return self @staticmethod def _complain_ifclosed(closed): """ from StringIO """ if closed: raise ValueError("I/O operation on closed file") def cast(self, key, typ): """cast key as typ""" if key in self.header: if not self._spec: self._spec = [lambda x:x for k in self.header] if typ is None: self._spec[self.header.index(key)] = lambda x: x else: try: assert hasattr(typ, '__call__') self._spec[self.header.index(key)] = typ except AssertionError: raise TypeError('Cast Objects must be callable') else: raise KeyError("%s" % key) def _cast(self, row): if self._spec and row: try: return [f(v) for f, v in zip(self._spec, row)] except ValueError: r = [] for f, v in zip(self._spec, row): try: if not v and f != str: raise ValueError r.append(f(v)) except ValueError: warn("Value '%r' could not be cast to %s, value set to MISSINGVALUE" % (v, str(f)), RuntimeWarning) r.append(MISSINGVALUE) return r else: return row def __next__(self): """A FileIO object is its own iterator, see StringIO""" self._complain_ifclosed(self.closed) r = self.__read() if r is None: raise StopIteration return r def close(self): """ subclasses should clean themselves up and then call this method """ if not self.closed: self.closed = True del self.dataObj, self.pos def get(self, n): """ Seeks the file to n and returns n If .ids is set n should be an id, else, n should be an offset """ prevPos = self.tell() self.seek(n) obj = self.__read() self.seek(prevPos) return obj def seek(self, n): """ Seek the FileObj to the beginning of the n'th record, if ids are set, seeks to the beginning of the record at id, n""" self._complain_ifclosed(self.closed) self.pos = n def tell(self): """ Return id (or offset) of next object """ self._complain_ifclosed(self.closed) return self.pos def read(self, n=-1): """ Read at most n objects, less if read hits EOF if size is negative or omitted read all objects until EOF returns None if EOF is reached before any objects. """ self._complain_ifclosed(self.closed) if n < 0: #return list(self) result = [] while 1: try: result.append(self.__read()) except StopIteration: break return result elif n == 0: return None else: result = [] for i in range(0, n): try: result.append(self.__read()) except StopIteration: break return result def __read(self): """ Gets one row from the file handler, and if necessary casts it's objects """ row = self._read() if row is None: raise StopIteration row = self._cast(row) return row def _read(self): """ Must be implemented by subclasses that support 'r' subclasses should increment .pos and redefine this doc string """ self._complain_ifclosed(self.closed) raise NotImplementedError def truncate(self, size=None): """ Should be implemented by subclasses and redefine this doc string """ self._complain_ifclosed(self.closed) raise NotImplementedError def write(self, obj): """ Must be implemented by subclasses that support 'w' subclasses should increment .pos subclasses should also check if obj is an instance of type(list) and redefine this doc string """ self._complain_ifclosed(self.closed) "Write obj to dataObj" raise NotImplementedError def flush(self): self._complain_ifclosed(self.closed) raise NotImplementedError