astropy.io.ascii.fixedwidth 源代码
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""An extensible ASCII table reader and writer.
fixedwidth.py:
Read or write a table with fixed width columns.
:Copyright: Smithsonian Astrophysical Observatory (2011)
:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu)
"""
from . import core
from .core import InconsistentTableError, DefaultSplitter
from . import basic
[文档]class FixedWidthSplitter(core.BaseSplitter):
"""
Split line based on fixed start and end positions for each ``col`` in
``self.cols``.
This class requires that the Header class will have defined ``col.start``
and ``col.end`` for each column. The reference to the ``header.cols`` gets
put in the splitter object by the base Reader.read() function just in time
for splitting data lines by a ``data`` object.
Note that the ``start`` and ``end`` positions are defined in the pythonic
style so line[start:end] is the desired substring for a column. This splitter
class does not have a hook for ``process_lines`` since that is generally not
useful for fixed-width input.
"""
delimiter_pad = ''
bookend = False
delimiter = '|'
[文档] def __call__(self, lines):
for line in lines:
vals = [line[x.start:x.end] for x in self.cols]
if self.process_val:
yield [self.process_val(x) for x in vals]
else:
yield vals
[文档] def join(self, vals, widths):
pad = self.delimiter_pad or ''
delimiter = self.delimiter or ''
padded_delim = pad + delimiter + pad
if self.bookend:
bookend_left = delimiter + pad
bookend_right = pad + delimiter
else:
bookend_left = ''
bookend_right = ''
vals = [' ' * (width - len(val)) + val for val, width in zip(vals, widths)]
return bookend_left + padded_delim.join(vals) + bookend_right
class FixedWidthHeaderSplitter(DefaultSplitter):
'''Splitter class that splits on ``|``.'''
delimiter = '|'
[文档]class FixedWidthHeader(basic.BasicHeader):
"""
Fixed width table header reader.
"""
splitter_class = FixedWidthHeaderSplitter
""" Splitter class for splitting data lines into columns """
position_line = None # secondary header line position
""" row index of line that specifies position (default = 1) """
set_of_position_line_characters = set(r'`~!#$%^&*-_+=\|":' + "'")
[文档] def get_line(self, lines, index):
for i, line in enumerate(self.process_lines(lines)):
if i == index:
break
else: # No header line matching
raise InconsistentTableError('No header line found in table')
return line
[文档] def get_cols(self, lines):
"""
Initialize the header Column objects from the table ``lines``.
Based on the previously set Header attributes find or create the column names.
Sets ``self.cols`` with the list of Columns.
Parameters
----------
lines : list
List of table lines
"""
# See "else" clause below for explanation of start_line and position_line
start_line = core._get_line_index(self.start_line, self.process_lines(lines))
position_line = core._get_line_index(self.position_line, self.process_lines(lines))
# If start_line is none then there is no header line. Column positions are
# determined from first data line and column names are either supplied by user
# or auto-generated.
if start_line is None:
if position_line is not None:
raise ValueError("Cannot set position_line without also setting header_start")
# data.data_lines attribute already set via self.data.get_data_lines(lines)
# in BaseReader.read(). This includes slicing for data_start / data_end.
data_lines = self.data.data_lines
if not data_lines:
raise InconsistentTableError(
'No data lines found so cannot autogenerate column names')
vals, starts, ends = self.get_fixedwidth_params(data_lines[0])
self.names = [self.auto_format.format(i)
for i in range(1, len(vals) + 1)]
else:
# This bit of code handles two cases:
# start_line = <index> and position_line = None
# Single header line where that line is used to determine both the
# column positions and names.
# start_line = <index> and position_line = <index2>
# Two header lines where the first line defines the column names and
# the second line defines the column positions
if position_line is not None:
# Define self.col_starts and self.col_ends so that the call to
# get_fixedwidth_params below will use those to find the header
# column names. Note that get_fixedwidth_params returns Python
# slice col_ends but expects inclusive col_ends on input (for
# more intuitive user interface).
line = self.get_line(lines, position_line)
if len(set(line) - set([self.splitter.delimiter, ' '])) != 1:
raise InconsistentTableError(
'Position line should only contain delimiters and '
'one other character, e.g. "--- ------- ---".')
# The line above lies. It accepts white space as well.
# We don't want to encourage using three different
# characters, because that can cause ambiguities, but white
# spaces are so common everywhere that practicality beats
# purity here.
charset = self.set_of_position_line_characters.union(
set([self.splitter.delimiter, ' ']))
if not set(line).issubset(charset):
raise InconsistentTableError(
f'Characters in position line must be part of {charset}')
vals, self.col_starts, col_ends = self.get_fixedwidth_params(line)
self.col_ends = [x - 1 if x is not None else None for x in col_ends]
# Get the header column names and column positions
line = self.get_line(lines, start_line)
vals, starts, ends = self.get_fixedwidth_params(line)
self.names = vals
self._set_cols_from_names()
# Set column start and end positions.
for i, col in enumerate(self.cols):
col.start = starts[i]
col.end = ends[i]
[文档] def get_fixedwidth_params(self, line):
"""
Split ``line`` on the delimiter and determine column values and
column start and end positions. This might include null columns with
zero length (e.g. for ``header row = "| col1 || col2 | col3 |"`` or
``header2_row = "----- ------- -----"``). The null columns are
stripped out. Returns the values between delimiters and the
corresponding start and end positions.
Parameters
----------
line : str
Input line
Returns
-------
vals : list
List of values.
starts : list
List of starting indices.
ends : list
List of ending indices.
"""
# If column positions are already specified then just use those.
# If neither column starts or ends are given, figure out positions
# between delimiters. Otherwise, either the starts or the ends have
# been given, so figure out whichever wasn't given.
if self.col_starts is not None and self.col_ends is not None:
starts = list(self.col_starts) # could be any iterable, e.g. np.array
# user supplies inclusive endpoint
ends = [x + 1 if x is not None else None for x in self.col_ends]
if len(starts) != len(ends):
raise ValueError('Fixed width col_starts and col_ends must have the same length')
vals = [line[start:end].strip() for start, end in zip(starts, ends)]
elif self.col_starts is None and self.col_ends is None:
# There might be a cleaner way to do this but it works...
vals = line.split(self.splitter.delimiter)
starts = [0]
ends = []
for val in vals:
if val:
ends.append(starts[-1] + len(val))
starts.append(ends[-1] + 1)
else:
starts[-1] += 1
starts = starts[:-1]
vals = [x.strip() for x in vals if x]
if len(vals) != len(starts) or len(vals) != len(ends):
raise InconsistentTableError('Error parsing fixed width header')
else:
# exactly one of col_starts or col_ends is given...
if self.col_starts is not None:
starts = list(self.col_starts)
ends = starts[1:] + [None] # Assume each col ends where the next starts
else: # self.col_ends is not None
ends = [x + 1 for x in self.col_ends]
starts = [0] + ends[:-1] # Assume each col starts where the last ended
vals = [line[start:end].strip() for start, end in zip(starts, ends)]
return vals, starts, ends
[文档] def write(self, lines):
# Header line not written until data are formatted. Until then it is
# not known how wide each column will be for fixed width.
pass
[文档]class FixedWidthData(basic.BasicData):
"""
Base table data reader.
"""
splitter_class = FixedWidthSplitter
""" Splitter class for splitting data lines into columns """
[文档] def write(self, lines):
vals_list = []
col_str_iters = self.str_vals()
for vals in zip(*col_str_iters):
vals_list.append(vals)
for i, col in enumerate(self.cols):
col.width = max([len(vals[i]) for vals in vals_list])
if self.header.start_line is not None:
col.width = max(col.width, len(col.info.name))
widths = [col.width for col in self.cols]
if self.header.start_line is not None:
lines.append(self.splitter.join([col.info.name for col in self.cols],
widths))
if self.header.position_line is not None:
char = self.header.position_char
if len(char) != 1:
raise ValueError(f'Position_char="{char}" must be a single character')
vals = [char * col.width for col in self.cols]
lines.append(self.splitter.join(vals, widths))
for vals in vals_list:
lines.append(self.splitter.join(vals, widths))
return lines
[文档]class FixedWidth(basic.Basic):
"""Fixed width table with single header line defining column names and positions.
Examples::
# Bar delimiter in header and data
| Col1 | Col2 | Col3 |
| 1.2 | hello there | 3 |
| 2.4 | many words | 7 |
# Bar delimiter in header only
Col1 | Col2 | Col3
1.2 hello there 3
2.4 many words 7
# No delimiter with column positions specified as input
Col1 Col2Col3
1.2hello there 3
2.4many words 7
See the :ref:`astropy:fixed_width_gallery` for specific usage examples.
"""
_format_name = 'fixed_width'
_description = 'Fixed width'
header_class = FixedWidthHeader
data_class = FixedWidthData
def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True):
super().__init__()
self.data.splitter.delimiter_pad = delimiter_pad
self.data.splitter.bookend = bookend
self.header.col_starts = col_starts
self.header.col_ends = col_ends
class FixedWidthNoHeaderHeader(FixedWidthHeader):
'''Header reader for fixed with tables with no header line'''
start_line = None
class FixedWidthNoHeaderData(FixedWidthData):
'''Data reader for fixed width tables with no header line'''
start_line = 0
[文档]class FixedWidthNoHeader(FixedWidth):
"""Fixed width table which has no header line.
When reading, column names are either input (``names`` keyword) or
auto-generated. Column positions are determined either by input
(``col_starts`` and ``col_stops`` keywords) or by splitting the first data
line. In the latter case a ``delimiter`` is required to split the data
line.
Examples::
# Bar delimiter in header and data
| 1.2 | hello there | 3 |
| 2.4 | many words | 7 |
# Compact table having no delimiter and column positions specified as input
1.2hello there3
2.4many words 7
This class is just a convenience wrapper around the ``FixedWidth`` reader
but with ``header_start=None`` and ``data_start=0``.
See the :ref:`astropy:fixed_width_gallery` for specific usage examples.
"""
_format_name = 'fixed_width_no_header'
_description = 'Fixed width with no header'
header_class = FixedWidthNoHeaderHeader
data_class = FixedWidthNoHeaderData
def __init__(self, col_starts=None, col_ends=None, delimiter_pad=' ', bookend=True):
super().__init__(col_starts, col_ends, delimiter_pad=delimiter_pad,
bookend=bookend)
class FixedWidthTwoLineHeader(FixedWidthHeader):
'''Header reader for fixed width tables splitting on whitespace.
For fixed width tables with several header lines, there is typically
a white-space delimited format line, so splitting on white space is
needed.
'''
splitter_class = DefaultSplitter
class FixedWidthTwoLineDataSplitter(FixedWidthSplitter):
'''Splitter for fixed width tables splitting on ``' '``.'''
delimiter = ' '
class FixedWidthTwoLineData(FixedWidthData):
'''Data reader for fixed with tables with two header lines.'''
splitter_class = FixedWidthTwoLineDataSplitter
[文档]class FixedWidthTwoLine(FixedWidth):
"""Fixed width table which has two header lines.
The first header line defines the column names and the second implicitly
defines the column positions.
Examples::
# Typical case with column extent defined by ---- under column names.
col1 col2 <== header_start = 0
----- ------------ <== position_line = 1, position_char = "-"
1 bee flies <== data_start = 2
2 fish swims
# Pretty-printed table
+------+------------+
| Col1 | Col2 |
+------+------------+
| 1.2 | "hello" |
| 2.4 | there world|
+------+------------+
See the :ref:`astropy:fixed_width_gallery` for specific usage examples.
"""
_format_name = 'fixed_width_two_line'
_description = 'Fixed width with second header line'
data_class = FixedWidthTwoLineData
header_class = FixedWidthTwoLineHeader
def __init__(self, position_line=1, position_char='-', delimiter_pad=None, bookend=False):
super().__init__(delimiter_pad=delimiter_pad, bookend=bookend)
self.header.position_line = position_line
self.header.position_char = position_char
self.data.start_line = position_line + 1