Source code for challenges.challenge

"""Core module of challenges

This module holds the base class of all challenges.
"""

import re
import math
import types
from collections import defaultdict


[docs]class Challenge: """Base class of all challenges Design concept is the Template Method Design Pattern (GOF). Attributes: :sample: The input of the challenge. :output: The output of the challenge Workflow: The `main` method controls the overall workflow by calling the worker methods. This is the common character of all challenges. The base class controls the workflow of the derived workers. Workers: The worker methods need to be implemented by the inheriting class. :read: Read the input into a list of lines. :build: Build the data model from the lines. :calc: Run the main algorithm of the challenge. :format: Create the output string required by the grader. Library: The other methods support the implementation of the workers. They address the extraction of data from the input lines or the formatting of the output. Sample: The attribute `sample` is both used as class and as instance attribute. When the instance attribute is injected it shadows the class attribute. By this the class attribute sets a tiny but useful default. When the challenge runner is executed with the option `--klass` no instance variable is injected and the sample from the class is used:: prompt> challenge MyChallenge --klass When the runner is executed with the option `--file` the files content is injected:: prompt> challenge MyChallenge --file ~/Downloads/data.txt """ sample = ''' sample sample ''' """Holds a minimal example of the input with additional whitespace. This class variable should always be preset with a tiny sample of input. Whitespace surrounding lines is for readability. It typically needs to be stripped to get the actual sample. """ expect = ''' expected result expected result ''' """Holds the expected result with additional leading whitespace. Whitespace surrounding lines is for readability. It typically needs to be stripped to get the actual expactation. """ br = '\n' """Line breaks as expected by the most graders.""" split_pattern = '\s+|\s?,\s?' """Reg expression to split input lines. Used by some of the input parsing functions. The default splits by whitespace and/or comma. If the input is separated differently like colons or semicolons it needs adjustment in the inheriting class. """ edge_pattern = '^(\d+)->(\d+)(:(\d+))?$' """Reg expression to extract edges of a graph. With or without weight. 2->3 2->3:22 A default setting used by methods that extract edges from input lines. May need adjustment for different kind of edge input formats. """ multi_edge_pattern = '^(\d+)->(\d+(,?\s*\d+)*)$' """Reg expressen to extrct edges of a graph. Multiple edges on one line. 2->3, 4, 5 A default setting used by methods that extract edges from input lines. May need adjustment for different kind of edge input formats. """ fasta_pattern = '^[\-\*A-Z]+$' """Reg expression for FASTA sequences. Matches lines holding FASTA sequences. """ def __init__(self): self.lines = [] """A list of lines that will be filled by the method read().""" self.model = types.SimpleNamespace() """The imported data model. A flexible namespace object to take up any kind of data. In simple cases this may be completely overwritten, i.e by a list or dict. """ self.result = types.SimpleNamespace() """The resulting data model. A flexible namespace object to take up any kind of data. In simple cases this may be completely overwritten, i.e by a list or dict. """ self.output = '' """The output string. The string representation of the resulting model as expected by the grader. """
[docs] def main(self): """Control the workflow of the challenge. Usually this method doesn't need to be overwritten. The workers share data via instance variables. The overall input is injected into self.sample. The overall output is read from self.result. """ self.read() self.build() self.calc() self.format()
# -------------------------------------------------- # Default and abstract workers # --------------------------------------------------
[docs] def read(self): """Extract the input string self.sample into self.lines. Typically this method can be used as is. """ self.lines = self.example().splitlines()
[docs] def build(self): """Set up the model from the input lines. This method must be implemented. Reads from self.lines. Fills self.model. """ pass
[docs] def calc(self): """Main algorithm of the challenge. This method must be implemented. Here the interesting stuff happens. Best practice is to delegate to functions, that are named by the algorithms used or even to other classes that implement the algorithm. Reads from self.model. Fills self.result. """ pass
[docs] def format(self): """Format the output string. In simple cases this method can be used as is. In other cases it needs to be reimplemented. Reads from self.result. Fills self.output. """ self.output = str(self.result)
# -------------------------------------------------- # Accessing example and expectation # --------------------------------------------------
[docs] def example(self): """Get the sample, with heading whitespace trimmed""" lines = self.sample.strip().splitlines() return '\n'.join(line.strip() for line in lines)
[docs] def expectation(self): """Get the expecation, with heading whitespace trimmed""" lines = self.expect.strip().splitlines() return '\n'.join(line.strip() for line in lines)
# -------------------------------------------------- # Accessing input lines # --------------------------------------------------
[docs] def line(self, nr: int): """ Return one line by the given number. :param nr: line number :return: line as string """ return self.lines[nr]
[docs] def lines_to_list(self, start: int = 0, stop: int = None): """Return a list of lines. If stop is not given all remaining lines are used. :param start: index of first line :param stop: index of line after last line :return: list of lines """ if stop: return self.lines[start:stop] else: return self.lines[start:]
def _to_words(self, line: str): """ Split line into words The split behaviour can be adjusted by changing self.split_pattern. :param line: the string to split :return: list of words """ return list(re.compile(self.split_pattern).split(line))
[docs] def line_to_words(self, nr: int): """ Split one line into a list of words. :param nr: line number :return: list of words :see: self._to_words() """ return self._to_words(self.line(nr))
[docs] def lines_to_words(self, start: int = 0, stop: int = None, flatten: bool = False): """Split a range of lines into words. If stop is not given all remaining lines are used. :param start: index of first line :param stop: index of line after last line :param flatten: flatten to one dimensional list :return: one or two dimensional list of words :see: self._to_words() """ words = [] for line in self.lines_to_list(start, stop): if flatten: words += self._to_words(line) else: words.append(self._to_words(line)) return words
def _to_integers(self, line:str): """ Split line into integers The split behaviour can be adjusted by changing self.split_pattern. :param line: the string to split :return: list of integers """ return [int(i) for i in re.compile(self.split_pattern).split(line)]
[docs] def line_to_integer(self, nr: int): """ Return line as integer. :param nr: line number :return: integer """ return int(self.line(nr))
[docs] def line_to_integers(self, nr: int): """ Split one line into a list of integers. :param nr: line number :return: list of integers :see: self._to_integers """ return self._to_integers(self.line(nr))
[docs] def lines_to_integers(self, start:int=0, stop=None, flatten=False): """Split a range of lines into integers If stop is not given all remaining lines are used. :param start: index of first line :param stop: index of line after last line :param flatten: flatten to one dimensional list :return: one or two dimensional list of integers :see: self._to_integers() """ integers = [] for line in self.lines_to_list(start, stop): if flatten: integers += self._to_integers(line) else: integers.append(self._to_integers(line)) return integers
def _to_floats(self, line:str): """ Split line into floats The split behaviour can be adjusted by changing self.split_pattern. :param line: the string to split :return: list of floats """ return [float(f) for f in re.compile(self.split_pattern).split(line)]
[docs] def line_to_float(self, nr: int): """ Return line as float. :param nr: line number :return: float """ return float(self.line(nr))
[docs] def line_to_floats(self, nr: int): """ Split one line into a list of floats. :param nr: line number :return: list of floats :see: self._to_floats """ return self._to_floats(self.line(nr))
[docs] def lines_to_floats(self, start:int=0, stop=None, flatten=False): """Split a range of lines into floats If stop is not given all remaining lines are used. :param start: index of first line :param stop: index of line after last line :param flatten: flatten to one dimensional list :return: one or two dimensional list of floats :see: self._to_floats() """ floats = [] for line in self.lines_to_list(start, stop): if flatten: floats += self._to_floats(line) else: floats.append(self._to_floats(line)) return floats
[docs] def line_to_permutation(self, nr: int, terminals: bool = False): """Convert one line to a permutation optionally surrounded by terminals Example: (+1 -3, -2) Result: (1, -3, 2) If terminals is True: (0, 1, -3, 2, 4) The number of the line is selected by nr. Input may be surrounded by a pair of round parenthesis. :param nr: line number :param terminals: if True surrounded by 0 and n + 1 :return: permutation """ line = self.line(nr) match = re.compile('^\((.*)\)$').match(line) if match: digits = match.group(1) else: digits = line perm = [int(d) for d in re.compile(self.split_pattern).split(digits)] if terminals: perm = [0] + perm + [len(perm) + 1] return tuple(perm)
[docs] def line_to_permutations(self, nr: int): """Convert one line to multiple permutations Example: (+1 -3, -2)(+4 +5) Result: [(1, -3, 2), (4, 5)] The number of the line is selected by line_nr. :param nr: line number :return: list of permutations (tuples) """ matches = re.findall('\(([^)]*)\)', self.line(nr)) result = [] for digits in matches: result.append(tuple(int(d) for d in re.compile( self.split_pattern).split(digits))) return result
# noinspection PyMethodMayBeStatic def _to_edges(self, line:str): """Convert input string to edges. Detects if the line in single edge or multi edge format. Single edge formats: tail->head tail->head:weight Multi edge formats: tail->head, head, head Edge is of type namespace: edge.tail edge.head edge.weight if given :param line: input string :return: list of edge """ edges = [] match = re.compile(self.edge_pattern).match(line) if match: edge = types.SimpleNamespace() edge.tail = int(match.group(1)) edge.head = int(match.group(2)) if match.group(4): edge.weight = int(match.group(4)) edges.append(edge) else: match = re.compile(self.multi_edge_pattern).match(line) if match: tail = int(match.group(1)) rest = match.group(2) heads = [int(i) for i in re.compile(self.split_pattern).split(rest)] for head in heads: edge = types.SimpleNamespace() edge.tail = tail edge.head = head edges.append(edge) return edges
[docs] def line_to_edge(self, nr: int): """Convert one line to an edge. :param nr: line number :return: edge (namespace: tail, head, weight) :see: self._to_edges """ return self._to_edges(self.line(nr))[0]
[docs] def line_to_edges(self, nr: int): """Convert one line to multiple edges. 1->2,3,4 :param nr: line number :return: edge (namespace: tail, head, weight) :see: self._to_edges """ return self._to_edges(self.line(nr))
[docs] def lines_to_edges(self, start: int = 0, stop: int = None): """Retrun a list of edges for range of lines. 1->2 # simple edge 1->2:22 # weighted edge 1->2,3,4 # muliple edges per line If stop is not given all remaining lines are used. :param start: :param stop: :return: list of edges (namespace: tail, head, weight) :see: self._to_edges """ edges = [] for line in self.lines_to_list(start, stop): edges += self._to_edges(line) return edges
[docs] def lines_to_graph(self, start: int = 0, stop: int = None): """Retrun a graph for range of lines If stop is not given all remaining lines are usee. Formats: 1->2 # simple edge 1->2:22 # weighted edge 1->2,3,4 # muliple edges per line Properties: graph.edges: dict, tails as keys and list of heads as values graph.weights: dict, pairs of tail, head as keys and weight as value :param start: :param stop: :return: graph, namespace with graphs properties :see: self._to_edges """ graph = types.SimpleNamespace() graph.edges = defaultdict(list) graph.weights = dict() edges = [] nodes = [] for line in self.lines_to_list(start, stop): edges += self._to_edges(line) for edge in edges: nodes.append(edge.head) nodes.append(edge.tail) graph.edges[edge.tail].append(edge.head) try: graph.weights[(edge.tail, edge.head)] = edge.weight except AttributeError: pass graph.nodes = sorted(set(nodes)) graph.edge_count = len(edges) graph.node_count = len(graph.nodes) return graph
[docs] def edges(self, start: int = 0, stop: int = None): """Generator to read edges from lines. !!! DEPRECATED !!! use lines_to_edges() Reads a range of lines, one edge per line, and yields the edges. By the start and stop parameters a range can be given. The stop parameter is the index behind the last line to use. The line to start is set by the parameter start. It defaults to zero. The line to stop is set by the parameter stop. When it is not provided lines are used as long as they match the edge_pattern reg expression. The match behaviour can be adjusted by the self.edge_pattern. """ def _to_edge(match): edge = types.SimpleNamespace() edge.tail = int(match.group(1)) edge.head = int(match.group(2)) if match.group(4): edge.weight = int(match.group(4)) return edge if stop is None: stop = math.inf nr = start while nr < stop: try: line = self.line(nr) except IndexError: break match = re.compile(self.edge_pattern).match(line) if match: yield (_to_edge(match)) nr += 1 else: break # If edges end before stop, which may be infinity
[docs] def fasta(self, start: int = 0, stop: int = None): """Generator to read FASTA formatted samples. Reads multiple fasta sequences and yields them. By the start and stop parameters a range can be given. The stop parameter is the index behind the last line to use. The line to start is set by the parameter start. It defaults to zero. The line to stop is set by the parameter stop. When it is not provided lines are used as long as they match the FASTA format. The match behaviour can be adjusted by the self.fasta_pattern. """ name, sequence = '', '' if stop is None: stop = math.inf nr = start while nr < stop: try: line = self.line(nr) except IndexError: break if line.startswith('>'): if name != '' and sequence != '': # Yield previous sequence if any yield name, sequence name, sequence = line[1:], '' # Reset else: match = re.compile(self.fasta_pattern).match(line) if match: sequence += line else: break # If edges end before stop, which may be infinity nr += 1 # Yield final sequence yield name, sequence
[docs] def fasta_strands(self, start: int = 0, stop: int = None): """ Get the strands of a fasta read as list. Takes the same arguments as self.fasta() and delegates to it. """ return list(dict(self.fasta(start, stop)).values())
# -------------------------------------------------- # Formatting # -------------------------------------------------- # noinspection PyMethodMayBeStatic
[docs] def format_list_of_integers(self, integers: list, joint: str = ', '): """Join a list of integers to a string Use the given joint. """ return joint.join(str(x) for x in integers)
[docs] def format_path(self, integers: list, backwards: bool = False): """Join a list of integers to path of nodes. The joint is -> by default. If the parameter backwards is True the joint is <-. """ if backwards: joint = '<-' else: joint = '->' return self.format_list_of_integers(integers, joint)
[docs] def format_permutations(self, permutations: list, separator: str = '\n', element_separator: str = ' '): entries = [] for perm in permutations: entry = '(' entry += element_separator.join( ('+' if i > 0 else '') + str(i) for i in perm) entry += ')' entries.append(entry) return separator.join(entries)