Source code for cctk.lines

import re
from itertools import islice


[docs]
class LazyLineObject:
    """
    Instead of storing ``lines`` as an array, this object can be used.
    It reduces the memory usage drastically! It looks up lines only when needed.
    """

[docs]
    def __init__(self, file, start, end):
       self.file = file
       self.start = start
       self.end = end


    def __len__(self):
        return self.end - self.start

    def __str__(self):
        return f"LazyLineObject for file {self.file}, lines {self.start}-{self.end}"

    def __repr__(self):
        return f"LazyLineObject for file {self.file}, lines {self.start}-{self.end}"

    def __iter__(self):
        with open(self.file, "r") as lines:
            for line in islice(lines, self.start, self.end + 1):
                yield line.rstrip("\n")

    def __getitem__(self, key):
        if key >= len(self):
            raise KeyError("key too big")
        with open(self.file, "r") as lines:
            for line in islice(lines, self.start + key, self.start + key + 1):
                return line.rstrip()


[docs]
    def full_text(self):
        text = ""
        with open(self.file, "r") as lines:
            for line in islice(lines, self.start, self.end + 1):
                text += line.rstrip() + "\n"
        return text



[docs]
    def search_for_block(self, start, end, count=1, join=" ", max_len=1000, format_line=None):
        """
        Search through a file (lines) and locate a block starting with "start" (inclusive) and ending with "end" (exclusive).

        Args:
            start (str): a pattern that matches the start of the block (can contain special characters)
            end (str): a pattern that matches the end of the block (can contain special characters) - ``None`` removes this (so a selection of ``max_lines`` is guaranteed)
            count (int): how many matches to search for
            join (str): spacer between lines
            max_len (int): maximum length of matches (to prevent overflow)
            format_line (function): function to perform to each line before adding to match (e.g. remove leading space)

        Returns:
            a single match (str) if count == 1 or a list of matches (str) if count > 1.
        """
        assert isinstance(count, int), "count needs to be an integer"
        assert isinstance(max_len, int), "count needs to be an integer"
        assert isinstance(join, str), "join needs to be a string"

        if count == 0:
            return None

        current_match = ""
        current_len = 0
        match = [None] * count

        #### we want a regex that will never match anything - and quickly - so trying to match something before the start of the line works
        if end is None:
            end = "a^"

        start_pattern = re.compile(start)
        end_pattern = re.compile(end)

        index = 0
        for line in self:
            if current_match:
                if end_pattern.search(line) or current_len >= max_len:
                    match[index] = current_match
                    current_match = None
                    index += 1
                    current_len = 0

                    if index == count:
                        break
                else:
                    if format_line is not None:
                        current_match = current_match + join + format_line(line.lstrip())
                    else:
                        current_match = current_match + join + line.lstrip()
                    current_len += 1
            else:
                if start_pattern.search(line):
                    if format_line is not None:
                        current_match = format_line(line.lstrip())
                    else:
                        current_match = line.lstrip()
                    current_len = 1

        if count == 1:
            return match[0]
        else:
            return match




[docs]
    def find_parameter(self, parameter, expected_length, which_field, split_on=None, cast_to_float=True):
        """
        Args:
            parameter (string): test to search for
            expected_length (int): how many fields there should be
            which_field (int or list): which field(s) the parameter is (zero-indexed)
            split_on (str): additional non-space field on which to split
            cast_to_float (Bool): whether or not to cast extracted value to float
        Returns:
            a list of all the extracted values
        """
        if not isinstance(which_field, list):
            which_field = [which_field]

        if not isinstance(expected_length, int):
            raise TypeError("expected_length must be type int!")

        for n in which_field:
            if not isinstance(n, int):
                raise TypeError("which_field must be type int!")
            if n >= expected_length:
                raise ValueError("can't expect a field after the last field!")

        matches = []
        pattern = False

        try:
            pattern = re.compile(parameter)
        except Exception:
            raise ValueError("pattern {pattern} cannot be compiled as a regex; try again!")

        if pattern:
            for line in self:
                if pattern.search(line):
                    fields = re.split(" +", line)
                    if split_on:
                        fields2 = []
                        for field in fields:
                            fields2 = fields2 + field.split(split_on)
                        fields = fields2
                    fields = list(filter(None, fields))

                    if len(fields) == expected_length:
                        desired_fields = []
                        for n in which_field:
                            if cast_to_float:
                                try:
                                    desired_fields.append(float(fields[n]))
                                except Exception:
                                    desired_fields.append(0)
                            else:
                                desired_fields.append(fields[n])
                        if len(desired_fields) == 1:
                            matches.append(desired_fields[0])
                        else:
                            matches.append(desired_fields)
            return matches