Source code for ete4.parser.nexus

"""
Read trees from a file in nexus format.
"""

# See https://en.wikipedia.org/wiki/Nexus_file

import re

from . import newick as newick_parser


[docs] class NexusError(Exception): pass
[docs] def load(fp, parser=None): return loads(fp.read(), parser=parser)
[docs] def loads(text, parser=None): return {name: newick_parser.loads(newick, parser=parser) for name, newick in get_trees(text, parser=parser).items()}
[docs] def get_trees(text, parser=None): """Return trees as {name: newick} with all the name transformations done.""" if not re.match(r'^#NEXUS\s*\n', text, flags=re.I): raise NexusError('text does not start with "#NEXUS"') commands = get_section(text, 'TREES') translate = {} if 'TRANSLATE' in commands: if len(commands['TRANSLATE']) != 1: raise NexusError('multiple TRANSLATE commands') pairs = commands['TRANSLATE'][0].split(',') translate.update(pair.split(maxsplit=1) for pair in pairs) trees = {} for command in commands.get('TREE', []): name_ugly, newick_ugly = command.split('=', maxsplit=1) name = name_ugly.strip('\t\r\n "\'') newick = newick_ugly.strip() + ';' if newick.startswith('['): # remove possible [&U] or comment newick = newick[newick.find(']')+1:].strip() trees[name] = apply_translations(translate, newick, parser) return trees
[docs] def apply_translations(translate, newick, parser=None): """Return newick with node names translated according to the given dict.""" if not translate: return newick t = newick_parser.loads(newick, parser=parser) for node in t: if node.name in translate: node.name = translate[node.name] return newick_parser.dumps(t, parser=parser)
[docs] def get_section(text, section_name): """Return commands ({name: [args]}) that correspond to the given section.""" return get_sections(text).get(section_name.upper(), {})
[docs] def get_sections(text): """Return {section: commands} read from the full text of a nexus file.""" pattern = r'\nBEGIN\s+(\w+)\s*;(.*?)\nEND\s*;' sections = {} for m in re.finditer(pattern, text, flags=re.I | re.S): name, text_section = m.groups() sections[name.upper()] = get_commands(text_section) return sections
[docs] def get_commands(text_section): """Return a dict that for each command has a list with its arguments.""" pattern = r';.*?(?=;)' commands = {} for m in re.finditer(pattern, ';' + text_section, flags=re.I | re.S): name, args = m.group().strip(';\r\n\t ').split(maxsplit=1) commands.setdefault(name.upper(), []).append(args) return commands