"""Core module of challenges
This module holds the base class of all challenges.
"""
import re
import math
import types
from collections import defaultdict
[docs]class Challenge:
"""Base class of all challenges
Design concept is the Template Method Design Pattern (GOF).
Attributes:
:sample: The input of the challenge.
:output: The output of the challenge
Workflow:
The `main` method controls the overall workflow by calling the worker
methods. This is the common character of all challenges.
The base class controls the workflow of the derived workers.
Workers:
The worker methods need to be implemented by the inheriting class.
:read: Read the input into a list of lines.
:build: Build the data model from the lines.
:calc: Run the main algorithm of the challenge.
:format: Create the output string required by the grader.
Library:
The other methods support the implementation of the workers. They address
the extraction of data from the input lines or the formatting of the
output.
Sample:
The attribute `sample` is both used as class and as instance attribute.
When the instance attribute is injected it shadows the class attribute. By
this the class attribute sets a tiny but useful default.
When the challenge runner is executed with the option `--klass` no
instance variable is injected and the sample from the class is used::
prompt> challenge MyChallenge --klass
When the runner is executed with the option `--file` the files content is
injected::
prompt> challenge MyChallenge --file ~/Downloads/data.txt
"""
sample = '''
sample
sample
'''
"""Holds a minimal example of the input with additional whitespace.
This class variable should always be preset with a tiny sample of input.
Whitespace surrounding lines is for readability. It typically needs to be
stripped to get the actual sample.
"""
expect = '''
expected result
expected result
'''
"""Holds the expected result with additional leading whitespace.
Whitespace surrounding lines is for readability. It typically needs to be
stripped to get the actual expactation.
"""
br = '\n'
"""Line breaks as expected by the most graders."""
split_pattern = '\s+|\s?,\s?'
"""Reg expression to split input lines.
Used by some of the input parsing functions. The default splits by
whitespace and/or comma. If the input is separated differently like colons
or semicolons it needs adjustment in the inheriting class.
"""
edge_pattern = '^(\d+)->(\d+)(:(\d+))?$'
"""Reg expression to extract edges of a graph.
With or without weight.
2->3
2->3:22
A default setting used by methods that extract edges from input lines.
May need adjustment for different kind of edge input formats.
"""
multi_edge_pattern = '^(\d+)->(\d+(,?\s*\d+)*)$'
"""Reg expressen to extrct edges of a graph.
Multiple edges on one line.
2->3, 4, 5
A default setting used by methods that extract edges from input lines.
May need adjustment for different kind of edge input formats.
"""
fasta_pattern = '^[\-\*A-Z]+$'
"""Reg expression for FASTA sequences.
Matches lines holding FASTA sequences.
"""
def __init__(self):
self.lines = []
"""A list of lines that will be filled by the method read()."""
self.model = types.SimpleNamespace()
"""The imported data model.
A flexible namespace object to take up any kind of data. In simple
cases this may be completely overwritten, i.e by a list or dict.
"""
self.result = types.SimpleNamespace()
"""The resulting data model.
A flexible namespace object to take up any kind of data. In simple
cases this may be completely overwritten, i.e by a list or dict.
"""
self.output = ''
"""The output string.
The string representation of the resulting model as expected by the
grader.
"""
[docs] def main(self):
"""Control the workflow of the challenge.
Usually this method doesn't need to be overwritten.
The workers share data via instance variables.
The overall input is injected into self.sample.
The overall output is read from self.result.
"""
self.read()
self.build()
self.calc()
self.format()
# --------------------------------------------------
# Default and abstract workers
# --------------------------------------------------
[docs] def read(self):
"""Extract the input string self.sample into self.lines.
Typically this method can be used as is.
"""
self.lines = self.example().splitlines()
[docs] def build(self):
"""Set up the model from the input lines.
This method must be implemented.
Reads from self.lines.
Fills self.model.
"""
pass
[docs] def calc(self):
"""Main algorithm of the challenge.
This method must be implemented. Here the interesting stuff happens.
Best practice is to delegate to functions, that are named by the
algorithms used or even to other classes that implement the algorithm.
Reads from self.model.
Fills self.result.
"""
pass
# --------------------------------------------------
# Accessing example and expectation
# --------------------------------------------------
[docs] def example(self):
"""Get the sample, with heading whitespace trimmed"""
lines = self.sample.strip().splitlines()
return '\n'.join(line.strip() for line in lines)
[docs] def expectation(self):
"""Get the expecation, with heading whitespace trimmed"""
lines = self.expect.strip().splitlines()
return '\n'.join(line.strip() for line in lines)
# --------------------------------------------------
# Accessing input lines
# --------------------------------------------------
[docs] def line(self, nr: int):
""" Return one line by the given number.
:param nr: line number
:return: line as string
"""
return self.lines[nr]
[docs] def lines_to_list(self, start: int = 0, stop: int = None):
"""Return a list of lines.
If stop is not given all remaining lines are used.
:param start: index of first line
:param stop: index of line after last line
:return: list of lines
"""
if stop:
return self.lines[start:stop]
else:
return self.lines[start:]
def _to_words(self, line: str):
""" Split line into words
The split behaviour can be adjusted by changing self.split_pattern.
:param line: the string to split
:return: list of words
"""
return list(re.compile(self.split_pattern).split(line))
[docs] def line_to_words(self, nr: int):
""" Split one line into a list of words.
:param nr: line number
:return: list of words
:see: self._to_words()
"""
return self._to_words(self.line(nr))
[docs] def lines_to_words(self, start: int = 0, stop: int = None,
flatten: bool = False):
"""Split a range of lines into words.
If stop is not given all remaining lines are used.
:param start: index of first line
:param stop: index of line after last line
:param flatten: flatten to one dimensional list
:return: one or two dimensional list of words
:see: self._to_words()
"""
words = []
for line in self.lines_to_list(start, stop):
if flatten:
words += self._to_words(line)
else:
words.append(self._to_words(line))
return words
def _to_integers(self, line:str):
""" Split line into integers
The split behaviour can be adjusted by changing self.split_pattern.
:param line: the string to split
:return: list of integers
"""
return [int(i) for i in re.compile(self.split_pattern).split(line)]
[docs] def line_to_integer(self, nr: int):
""" Return line as integer.
:param nr: line number
:return: integer
"""
return int(self.line(nr))
[docs] def line_to_integers(self, nr: int):
""" Split one line into a list of integers.
:param nr: line number
:return: list of integers
:see: self._to_integers
"""
return self._to_integers(self.line(nr))
[docs] def lines_to_integers(self, start:int=0, stop=None, flatten=False):
"""Split a range of lines into integers
If stop is not given all remaining lines are used.
:param start: index of first line
:param stop: index of line after last line
:param flatten: flatten to one dimensional list
:return: one or two dimensional list of integers
:see: self._to_integers()
"""
integers = []
for line in self.lines_to_list(start, stop):
if flatten:
integers += self._to_integers(line)
else:
integers.append(self._to_integers(line))
return integers
def _to_floats(self, line:str):
""" Split line into floats
The split behaviour can be adjusted by changing self.split_pattern.
:param line: the string to split
:return: list of floats
"""
return [float(f) for f in re.compile(self.split_pattern).split(line)]
[docs] def line_to_float(self, nr: int):
""" Return line as float.
:param nr: line number
:return: float
"""
return float(self.line(nr))
[docs] def line_to_floats(self, nr: int):
""" Split one line into a list of floats.
:param nr: line number
:return: list of floats
:see: self._to_floats
"""
return self._to_floats(self.line(nr))
[docs] def lines_to_floats(self, start:int=0, stop=None, flatten=False):
"""Split a range of lines into floats
If stop is not given all remaining lines are used.
:param start: index of first line
:param stop: index of line after last line
:param flatten: flatten to one dimensional list
:return: one or two dimensional list of floats
:see: self._to_floats()
"""
floats = []
for line in self.lines_to_list(start, stop):
if flatten:
floats += self._to_floats(line)
else:
floats.append(self._to_floats(line))
return floats
[docs] def line_to_permutation(self, nr: int, terminals: bool = False):
"""Convert one line to a permutation
optionally surrounded by terminals
Example: (+1 -3, -2)
Result: (1, -3, 2)
If terminals is True: (0, 1, -3, 2, 4)
The number of the line is selected by nr.
Input may be surrounded by a pair of round parenthesis.
:param nr: line number
:param terminals: if True surrounded by 0 and n + 1
:return: permutation
"""
line = self.line(nr)
match = re.compile('^\((.*)\)$').match(line)
if match:
digits = match.group(1)
else:
digits = line
perm = [int(d) for d in re.compile(self.split_pattern).split(digits)]
if terminals:
perm = [0] + perm + [len(perm) + 1]
return tuple(perm)
[docs] def line_to_permutations(self, nr: int):
"""Convert one line to multiple permutations
Example: (+1 -3, -2)(+4 +5)
Result: [(1, -3, 2), (4, 5)]
The number of the line is selected by line_nr.
:param nr: line number
:return: list of permutations (tuples)
"""
matches = re.findall('\(([^)]*)\)', self.line(nr))
result = []
for digits in matches:
result.append(tuple(int(d) for d in re.compile(
self.split_pattern).split(digits)))
return result
# noinspection PyMethodMayBeStatic
def _to_edges(self, line:str):
"""Convert input string to edges.
Detects if the line in single edge or multi edge format.
Single edge formats:
tail->head
tail->head:weight
Multi edge formats:
tail->head, head, head
Edge is of type namespace:
edge.tail
edge.head
edge.weight if given
:param line: input string
:return: list of edge
"""
edges = []
match = re.compile(self.edge_pattern).match(line)
if match:
edge = types.SimpleNamespace()
edge.tail = int(match.group(1))
edge.head = int(match.group(2))
if match.group(4):
edge.weight = int(match.group(4))
edges.append(edge)
else:
match = re.compile(self.multi_edge_pattern).match(line)
if match:
tail = int(match.group(1))
rest = match.group(2)
heads = [int(i) for i in
re.compile(self.split_pattern).split(rest)]
for head in heads:
edge = types.SimpleNamespace()
edge.tail = tail
edge.head = head
edges.append(edge)
return edges
[docs] def line_to_edge(self, nr: int):
"""Convert one line to an edge.
:param nr: line number
:return: edge (namespace: tail, head, weight)
:see: self._to_edges
"""
return self._to_edges(self.line(nr))[0]
[docs] def line_to_edges(self, nr: int):
"""Convert one line to multiple edges.
1->2,3,4
:param nr: line number
:return: edge (namespace: tail, head, weight)
:see: self._to_edges
"""
return self._to_edges(self.line(nr))
[docs] def lines_to_edges(self, start: int = 0, stop: int = None):
"""Retrun a list of edges for range of lines.
1->2 # simple edge
1->2:22 # weighted edge
1->2,3,4 # muliple edges per line
If stop is not given all remaining lines are used.
:param start:
:param stop:
:return: list of edges (namespace: tail, head, weight)
:see: self._to_edges
"""
edges = []
for line in self.lines_to_list(start, stop):
edges += self._to_edges(line)
return edges
[docs] def lines_to_graph(self, start: int = 0, stop: int = None):
"""Retrun a graph for range of lines
If stop is not given all remaining lines are usee.
Formats:
1->2 # simple edge
1->2:22 # weighted edge
1->2,3,4 # muliple edges per line
Properties:
graph.edges:
dict, tails as keys and list of heads as values
graph.weights:
dict, pairs of tail, head as keys and weight as value
:param start:
:param stop:
:return: graph, namespace with graphs properties
:see: self._to_edges
"""
graph = types.SimpleNamespace()
graph.edges = defaultdict(list)
graph.weights = dict()
edges = []
nodes = []
for line in self.lines_to_list(start, stop):
edges += self._to_edges(line)
for edge in edges:
nodes.append(edge.head)
nodes.append(edge.tail)
graph.edges[edge.tail].append(edge.head)
try:
graph.weights[(edge.tail, edge.head)] = edge.weight
except AttributeError:
pass
graph.nodes = sorted(set(nodes))
graph.edge_count = len(edges)
graph.node_count = len(graph.nodes)
return graph
[docs] def edges(self, start: int = 0, stop: int = None):
"""Generator to read edges from lines.
!!! DEPRECATED !!! use lines_to_edges()
Reads a range of lines, one edge per line, and yields the edges.
By the start and stop parameters a range can be given.
The stop parameter is the index behind the last line to use.
The line to start is set by the parameter start. It defaults to zero.
The line to stop is set by the parameter stop. When it is not provided
lines are used as long as they match the edge_pattern reg expression.
The match behaviour can be adjusted by the self.edge_pattern.
"""
def _to_edge(match):
edge = types.SimpleNamespace()
edge.tail = int(match.group(1))
edge.head = int(match.group(2))
if match.group(4):
edge.weight = int(match.group(4))
return edge
if stop is None:
stop = math.inf
nr = start
while nr < stop:
try:
line = self.line(nr)
except IndexError:
break
match = re.compile(self.edge_pattern).match(line)
if match:
yield (_to_edge(match))
nr += 1
else:
break # If edges end before stop, which may be infinity
[docs] def fasta(self, start: int = 0, stop: int = None):
"""Generator to read FASTA formatted samples.
Reads multiple fasta sequences and yields them.
By the start and stop parameters a range can be given.
The stop parameter is the index behind the last line to use.
The line to start is set by the parameter start. It defaults to zero.
The line to stop is set by the parameter stop. When it is not provided
lines are used as long as they match the FASTA format.
The match behaviour can be adjusted by the self.fasta_pattern.
"""
name, sequence = '', ''
if stop is None:
stop = math.inf
nr = start
while nr < stop:
try:
line = self.line(nr)
except IndexError:
break
if line.startswith('>'):
if name != '' and sequence != '':
# Yield previous sequence if any
yield name, sequence
name, sequence = line[1:], '' # Reset
else:
match = re.compile(self.fasta_pattern).match(line)
if match:
sequence += line
else:
break # If edges end before stop, which may be infinity
nr += 1
# Yield final sequence
yield name, sequence
[docs] def fasta_strands(self, start: int = 0, stop: int = None):
""" Get the strands of a fasta read as list.
Takes the same arguments as self.fasta() and delegates to it.
"""
return list(dict(self.fasta(start, stop)).values())
# --------------------------------------------------
# Formatting
# --------------------------------------------------
# noinspection PyMethodMayBeStatic