X-Git-Url: http://www.git.cypherpunks.ru/?a=blobdiff_plain;f=pyderasn.py;h=1c7f3ed94603bda04df4aa52c38e8f2d293c0055;hb=afc0f9f65430bed928619c783373ae3c6a82be1b;hp=ac74adca037466893af82825115ca0c7bd13269e;hpb=f8f92c66af855c581d4b6e8b824da63f6987cc33;p=pyderasn.git diff --git a/pyderasn.py b/pyderasn.py index ac74adc..1c7f3ed 100755 --- a/pyderasn.py +++ b/pyderasn.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -# PyDERASN -- Python ASN.1 DER codec with abstract structures -# Copyright (C) 2017-2018 Sergey Matveev +# PyDERASN -- Python ASN.1 DER/BER codec with abstract structures +# Copyright (C) 2017-2019 Sergey Matveev # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as @@ -16,10 +16,10 @@ # You should have received a copy of the GNU Lesser General Public # License along with this program. If not, see # . -"""Python ASN.1 DER codec with abstract structures +"""Python ASN.1 DER/BER codec with abstract structures -This library allows you to marshal and unmarshal various structures in -ASN.1 DER format, like this: +This library allows you to marshal various structures in ASN.1 DER +format, unmarshal them in BER/CER/DER ones. >>> i = Integer(123) >>> raw = i.encode() @@ -189,11 +189,18 @@ use following properties: Pay attention that those values do **not** include anything related to explicit tag. If you want to know information about it, then use: -``expled`` (to know if explicit tag is set), ``expl_offset`` (it is -lesser than ``offset``), ``expl_tlen``, ``expl_llen``, ``expl_vlen`` -(that actually equals to ordinary ``tlvlen``). -When error occurs, then :py:exc:`pyderasn.DecodeError` is raised. +* ``expled`` -- to know if explicit tag is set +* ``expl_offset`` (it is lesser than ``offset``) +* ``expl_tlen``, +* ``expl_llen`` +* ``expl_vlen`` (that actually equals to ordinary ``tlvlen``) +* ``fulloffset`` -- it equals to ``expl_offset`` if explicit tag is set, + ``offset`` otherwise +* ``fulllen`` -- it equals to ``expl_len`` if explicit tag is set, + ``tlvlen`` otherwise + +When error occurs, :py:exc:`pyderasn.DecodeError` is raised. .. _ctx: @@ -206,8 +213,11 @@ decoding process. Currently available context options: +* :ref:`allow_default_values ` +* :ref:`allow_expl_oob ` +* :ref:`allow_unordered_set ` +* :ref:`bered ` * :ref:`defines_by_path ` -* :ref:`strict_default_existence ` .. _pprinting: @@ -266,7 +276,7 @@ You can specify multiple fields, that will be autodecoded -- that is why ``defines`` kwarg is a sequence. You can specify defined field relatively or absolutely to current decode path. For example ``defines`` for AlgorithmIdentifier of X.509's -``tbsCertificate.subjectPublicKeyInfo.algorithm.algorithm``:: +``tbsCertificate:subjectPublicKeyInfo:algorithm:algorithm``:: ( (("parameters",), { @@ -294,8 +304,7 @@ Following types can be automatically decoded (DEFINED BY): When any of those fields is automatically decoded, then ``.defined`` attribute contains ``(OID, value)`` tuple. ``OID`` tells by which OID it was defined, ``value`` contains corresponding decoded value. For example -above, ``content_info["content"].defined == (id_signedData, -signed_data)``. +above, ``content_info["content"].defined == (id_signedData, signed_data)``. .. _defines_by_path_ctx: @@ -363,6 +372,52 @@ First function is useful for path construction when some automatic decoding is already done. ``any`` means literally any value it meet -- useful for SEQUENCE/SET OF-s. +.. _bered_ctx: + +BER encoding +------------ + +By default PyDERASN accepts only DER encoded data. It always encodes to +DER. But you can optionally enable BER decoding with setting ``bered`` +:ref:`context ` argument to True. Indefinite lengths and +constructed primitive types should be parsed successfully. + +* If object is encoded in BER form (not the DER one), then ``ber_encoded`` + attribute is set to True. Only ``BOOLEAN``, ``BIT STRING``, ``OCTET + STRING``, ``OBJECT IDENTIFIER``, ``SEQUENCE``, ``SET``, ``SET OF`` + can contain it. +* If object has an indefinite length encoding, then its ``lenindef`` + attribute is set to True. Only ``BIT STRING``, ``OCTET STRING``, + ``SEQUENCE``, ``SET``, ``SEQUENCE OF``, ``SET OF``, ``ANY`` can + contain it. +* If object has an indefinite length encoded explicit tag, then + ``expl_lenindef`` is set to True. +* If object has either any of BER-related encoding (explicit tag + indefinite length, object's indefinite length, BER-encoding) or any + underlying component has that kind of encoding, then ``bered`` + attribute is set to True. For example SignedData CMS can have + ``ContentInfo:content:signerInfos:*`` ``bered`` value set to True, but + ``ContentInfo:content:signerInfos:*:signedAttrs`` won't. + +EOC (end-of-contents) token's length is taken in advance in object's +value length. + +.. _allow_expl_oob_ctx: + +Allow explicit tag out-of-bound +------------------------------- + +Invalid BER encoding could contain ``EXPLICIT`` tag containing more than +one value, more than one object. If you set ``allow_expl_oob`` context +option to True, then no error will be raised and that invalid encoding +will be silently further processed. But pay attention that offsets and +lengths will be invalid in that case. + +.. warning:: + + This option should be used only for skipping some decode errors, just + to see the decoded structure somehow. + Primitive types --------------- @@ -404,6 +459,10 @@ CommonString ____________ .. autoclass:: pyderasn.CommonString +NumericString +_____________ +.. autoclass:: pyderasn.NumericString + UTCTime _______ .. autoclass:: pyderasn.UTCTime @@ -457,6 +516,7 @@ Various ------- .. autofunction:: pyderasn.abs_decode_path +.. autofunction:: pyderasn.colonize_hex .. autofunction:: pyderasn.hexenc .. autofunction:: pyderasn.hexdec .. autofunction:: pyderasn.tag_encode @@ -464,15 +524,28 @@ Various .. autofunction:: pyderasn.tag_ctxp .. autofunction:: pyderasn.tag_ctxc .. autoclass:: pyderasn.Obj +.. autoclass:: pyderasn.DecodeError + :members: __init__ +.. autoclass:: pyderasn.NotEnoughData +.. autoclass:: pyderasn.LenIndefForm +.. autoclass:: pyderasn.TagMismatch +.. autoclass:: pyderasn.InvalidLength +.. autoclass:: pyderasn.InvalidOID +.. autoclass:: pyderasn.ObjUnknown +.. autoclass:: pyderasn.ObjNotReady +.. autoclass:: pyderasn.InvalidValueType +.. autoclass:: pyderasn.BoundsError """ from codecs import getdecoder from codecs import getencoder from collections import namedtuple from collections import OrderedDict +from copy import copy from datetime import datetime from math import ceil from os import environ +from string import ascii_letters from string import digits from six import add_metaclass @@ -485,12 +558,13 @@ from six import iterbytes from six import PY2 from six import string_types from six import text_type +from six import unichr as six_unichr from six.moves import xrange as six_xrange try: from termcolor import colored -except ImportError: +except ImportError: # pragma: no cover def colored(what, *args): return what @@ -516,6 +590,7 @@ __all__ = ( "InvalidOID", "InvalidValueType", "ISO646String", + "LenIndefForm", "NotEnoughData", "Null", "NumericString", @@ -563,13 +638,19 @@ TagClassReprs = { } EOC = b"\x00\x00" EOC_LEN = len(EOC) +LENINDEF = b"\x80" # length indefinite mark +LENINDEF_PP_CHAR = "I" if PY2 else "∞" ######################################################################## # Errors ######################################################################## -class DecodeError(Exception): +class ASN1Error(ValueError): + pass + + +class DecodeError(ASN1Error): def __init__(self, msg="", klass=None, decode_path=(), offset=0): """ :param str msg: reason of decode failing @@ -592,7 +673,7 @@ class DecodeError(Exception): c for c in ( "" if self.klass is None else self.klass.__name__, ( - ("(%s)" % ".".join(str(dp) for dp in self.decode_path)) + ("(%s)" % ":".join(str(dp) for dp in self.decode_path)) if len(self.decode_path) > 0 else "" ), ("(at %d)" % self.offset) if self.offset > 0 else "", @@ -624,7 +705,7 @@ class InvalidOID(DecodeError): pass -class ObjUnknown(ValueError): +class ObjUnknown(ASN1Error): def __init__(self, name): super(ObjUnknown, self).__init__() self.name = name @@ -636,7 +717,7 @@ class ObjUnknown(ValueError): return "%s(%s)" % (self.__class__.__name__, self) -class ObjNotReady(ValueError): +class ObjNotReady(ASN1Error): def __init__(self, name): super(ObjNotReady, self).__init__() self.name = name @@ -648,7 +729,7 @@ class ObjNotReady(ValueError): return "%s(%s)" % (self.__class__.__name__, self) -class InvalidValueType(ValueError): +class InvalidValueType(ASN1Error): def __init__(self, expected_types): super(InvalidValueType, self).__init__() self.expected_types = expected_types @@ -662,7 +743,7 @@ class InvalidValueType(ValueError): return "%s(%s)" % (self.__class__.__name__, self) -class BoundsError(ValueError): +class BoundsError(ASN1Error): def __init__(self, bound_min, value, bound_max): super(BoundsError, self).__init__() self.bound_min = bound_min @@ -856,7 +937,7 @@ class Obj(object): "vlen", "expl_lenindef", "lenindef", - "bered", + "ber_encoded", ) def __init__( @@ -870,9 +951,7 @@ class Obj(object): self.tag = getattr(self, "impl", self.tag_default) if impl is None else impl self._expl = getattr(self, "expl", None) if expl is None else expl if self.tag != self.tag_default and self._expl is not None: - raise ValueError( - "implicit and explicit tags can not be set simultaneously" - ) + raise ValueError("implicit and explicit tags can not be set simultaneously") if default is not None: optional = True self.optional = optional @@ -880,7 +959,7 @@ class Obj(object): self.default = None self.expl_lenindef = False self.lenindef = False - self.bered = False + self.ber_encoded = False @property def ready(self): # pragma: no cover @@ -892,6 +971,12 @@ class Obj(object): if not self.ready: raise ObjNotReady(self.__class__.__name__) + @property + def bered(self): + """Is either object or any elements inside is BER encoded? + """ + return self.expl_lenindef or self.lenindef or self.ber_encoded + @property def decoded(self): """Is object decoded? @@ -946,6 +1031,7 @@ class Obj(object): decode_path=(), ctx=None, tag_only=False, + _ctx_immutable=True, ): """Decode the data @@ -953,14 +1039,17 @@ class Obj(object): :param int offset: initial data's offset :param bool leavemm: do we need to leave memoryview of remaining data as is, or convert it to bytes otherwise - :param ctx: optional :ref:`context ` governing decoding process. + :param ctx: optional :ref:`context ` governing decoding process :param tag_only: decode only the tag, without length and contents (used only in Choice and Set structures, trying to determine if tag satisfies the scheme) + :param _ctx_immutable: do we need to copy ``ctx`` before using it :returns: (Obj, remaining data) """ if ctx is None: ctx = {} + elif _ctx_immutable: + ctx = copy(ctx) tlv = memoryview(data) if self._expl is None: result = self._decode( @@ -1008,13 +1097,14 @@ class Obj(object): ctx=ctx, tag_only=tag_only, ) - if tag_only: + if tag_only: # pragma: no cover return obj, tail = result eoc_expected, tail = tail[:EOC_LEN], tail[EOC_LEN:] if eoc_expected.tobytes() != EOC: raise DecodeError( - msg="no EOC", + "no EOC", + klass=self.__class__, decode_path=decode_path, offset=offset, ) @@ -1042,9 +1132,16 @@ class Obj(object): ctx=ctx, tag_only=tag_only, ) - if tag_only: + if tag_only: # pragma: no cover return obj, tail = result + if obj.tlvlen < l and not ctx.get("allow_expl_oob", False): + raise DecodeError( + "explicit tag out-of-bound, longer than data", + klass=self.__class__, + decode_path=decode_path, + offset=offset, + ) return obj, (tail if leavemm else tail.tobytes()) @property @@ -1077,6 +1174,46 @@ class Obj(object): def expl_tlvlen(self): return self.expl_tlen + self.expl_llen + self.expl_vlen + @property + def fulloffset(self): + return self.expl_offset if self.expled else self.offset + + @property + def fulllen(self): + return self.expl_tlvlen if self.expled else self.tlvlen + + def pps_lenindef(self, decode_path): + if self.lenindef and not ( + getattr(self, "defined", None) is not None and + self.defined[1].lenindef + ): + yield _pp( + asn1_type_name="EOC", + obj_name="", + decode_path=decode_path, + offset=( + self.offset + self.tlvlen - + (EOC_LEN * 2 if self.expl_lenindef else EOC_LEN) + ), + tlen=1, + llen=1, + vlen=0, + ber_encoded=True, + bered=True, + ) + if self.expl_lenindef: + yield _pp( + asn1_type_name="EOC", + obj_name="EXPLICIT", + decode_path=decode_path, + offset=self.expl_offset + self.expl_tlvlen - EOC_LEN, + tlen=1, + llen=1, + vlen=0, + ber_encoded=True, + bered=True, + ) + class DecodePathDefBy(object): """DEFINED BY representation inside decode path @@ -1106,6 +1243,7 @@ class DecodePathDefBy(object): ######################################################################## PP = namedtuple("PP", ( + "obj", "asn1_type_name", "obj_name", "decode_path", @@ -1125,11 +1263,13 @@ PP = namedtuple("PP", ( "expl_vlen", "expl_lenindef", "lenindef", + "ber_encoded", "bered", )) def _pp( + obj=None, asn1_type_name="unknown", obj_name="unknown", decode_path=(), @@ -1149,9 +1289,11 @@ def _pp( expl_vlen=None, expl_lenindef=False, lenindef=False, + ber_encoded=False, bered=False, ): return PP( + obj, asn1_type_name, obj_name, decode_path, @@ -1171,48 +1313,57 @@ def _pp( expl_vlen, expl_lenindef, lenindef, + ber_encoded, bered, ) -def _colorize(what, colour, with_colours, attrs=("bold",)): +def _colourize(what, colour, with_colours, attrs=("bold",)): return colored(what, colour, attrs=attrs) if with_colours else what +def colonize_hex(hexed): + """Separate hexadecimal string with colons + """ + return ":".join(hexed[i:i + 2] for i in range(0, len(hexed), 2)) + + def pp_console_row( pp, oids=None, with_offsets=False, with_blob=True, with_colours=False, + with_decode_path=False, + decode_path_len_decrease=0, ): cols = [] if with_offsets: - col = "%5d%s" % ( + col = "%5d%s%s" % ( pp.offset, ( " " if pp.expl_offset is None else ("-%d" % (pp.offset - pp.expl_offset)) ), + LENINDEF_PP_CHAR if pp.expl_lenindef else " ", ) - cols.append(_colorize(col, "red", with_colours, ())) - col = "[%d,%d,%4d]" % (pp.tlen, pp.llen, pp.vlen) - col = _colorize(col, "green", with_colours, ()) - ber_deoffset = 0 - if pp.expl_lenindef: - ber_deoffset += 2 - if pp.lenindef: - ber_deoffset += 2 - col += ( - " " if ber_deoffset == 0 else - _colorize(("-%d" % ber_deoffset), "red", with_colours) + col = _colourize(col, "red", with_colours, ()) + col += _colourize("B", "red", with_colours) if pp.bered else " " + cols.append(col) + col = "[%d,%d,%4d]%s" % ( + pp.tlen, + pp.llen, + pp.vlen, + LENINDEF_PP_CHAR if pp.lenindef else " " ) + col = _colourize(col, "green", with_colours, ()) cols.append(col) - if len(pp.decode_path) > 0: - cols.append(" ." * (len(pp.decode_path))) + decode_path_len = len(pp.decode_path) - decode_path_len_decrease + if decode_path_len > 0: + cols.append(" ." * decode_path_len) ent = pp.decode_path[-1] if isinstance(ent, DecodePathDefBy): - cols.append(_colorize("DEFINED BY", "red", with_colours, ("reverse",))) + cols.append(_colourize("DEFINED BY", "red", with_colours, ("reverse",))) value = str(ent.defined_by) if ( oids is not None and @@ -1220,61 +1371,82 @@ def pp_console_row( ObjectIdentifier.asn1_type_name and value in oids ): - cols.append(_colorize("%s:" % oids[value], "green", with_colours)) + cols.append(_colourize("%s:" % oids[value], "green", with_colours)) else: - cols.append(_colorize("%s:" % value, "white", with_colours, ("reverse",))) + cols.append(_colourize("%s:" % value, "white", with_colours, ("reverse",))) else: - cols.append(_colorize("%s:" % ent, "yellow", with_colours, ("reverse",))) + cols.append(_colourize("%s:" % ent, "yellow", with_colours, ("reverse",))) if pp.expl is not None: klass, _, num = pp.expl col = "[%s%d] EXPLICIT" % (TagClassReprs[klass], num) - cols.append(_colorize(col, "blue", with_colours)) + cols.append(_colourize(col, "blue", with_colours)) if pp.impl is not None: klass, _, num = pp.impl col = "[%s%d]" % (TagClassReprs[klass], num) - cols.append(_colorize(col, "blue", with_colours)) + cols.append(_colourize(col, "blue", with_colours)) if pp.asn1_type_name.replace(" ", "") != pp.obj_name.upper(): - cols.append(_colorize(pp.obj_name, "magenta", with_colours)) - if pp.bered: - cols.append(_colorize("BER", "red", with_colours)) - cols.append(_colorize(pp.asn1_type_name, "cyan", with_colours)) + cols.append(_colourize(pp.obj_name, "magenta", with_colours)) + if pp.ber_encoded: + cols.append(_colourize("BER", "red", with_colours)) + cols.append(_colourize(pp.asn1_type_name, "cyan", with_colours)) if pp.value is not None: value = pp.value - cols.append(_colorize(value, "white", with_colours, ("reverse",))) + cols.append(_colourize(value, "white", with_colours, ("reverse",))) if ( oids is not None and pp.asn1_type_name == ObjectIdentifier.asn1_type_name and value in oids ): - cols.append(_colorize("(%s)" % oids[value], "green", with_colours)) + cols.append(_colourize("(%s)" % oids[value], "green", with_colours)) + if pp.asn1_type_name == Integer.asn1_type_name: + hex_repr = hex(int(pp.obj._value))[2:].upper() + if len(hex_repr) % 2 != 0: + hex_repr = "0" + hex_repr + cols.append(_colourize( + "(%s)" % colonize_hex(hex_repr), + "green", + with_colours, + )) if with_blob: if isinstance(pp.blob, binary_type): cols.append(hexenc(pp.blob)) elif isinstance(pp.blob, tuple): cols.append(", ".join(pp.blob)) if pp.optional: - cols.append(_colorize("OPTIONAL", "red", with_colours)) + cols.append(_colourize("OPTIONAL", "red", with_colours)) if pp.default: - cols.append(_colorize("DEFAULT", "red", with_colours)) + cols.append(_colourize("DEFAULT", "red", with_colours)) + if with_decode_path: + cols.append(_colourize( + "[%s]" % ":".join(str(p) for p in pp.decode_path), + "grey", + with_colours, + )) return " ".join(cols) -def pp_console_blob(pp): - cols = [" " * len("XXXXXYY [X,X,XXXX]YY")] - if len(pp.decode_path) > 0: - cols.append(" ." * (len(pp.decode_path) + 1)) +def pp_console_blob(pp, decode_path_len_decrease=0): + cols = [" " * len("XXXXXYYZZ [X,X,XXXX]Z")] + decode_path_len = len(pp.decode_path) - decode_path_len_decrease + if decode_path_len > 0: + cols.append(" ." * (decode_path_len + 1)) if isinstance(pp.blob, binary_type): blob = hexenc(pp.blob).upper() for i in range(0, len(blob), 32): chunk = blob[i:i + 32] - yield " ".join(cols + [":".join( - chunk[j:j + 2] for j in range(0, len(chunk), 2) - )]) + yield " ".join(cols + [colonize_hex(chunk)]) elif isinstance(pp.blob, tuple): yield " ".join(cols + [", ".join(pp.blob)]) -def pprint(obj, oids=None, big_blobs=False, with_colours=False): +def pprint( + obj, + oids=None, + big_blobs=False, + with_colours=False, + with_decode_path=False, + decode_path_only=(), +): """Pretty print object :param Obj obj: object you want to pretty print @@ -1285,10 +1457,19 @@ def pprint(obj, oids=None, big_blobs=False, with_colours=False): lines :param with_colours: colourize output, if ``termcolor`` library is available + :param with_decode_path: print decode path + :param decode_path_only: print only that specified decode path """ def _pprint_pps(pps): for pp in pps: if hasattr(pp, "_fields"): + if ( + decode_path_only != () and + tuple( + str(p) for p in pp.decode_path[:len(decode_path_only)] + ) != decode_path_only + ): + continue if big_blobs: yield pp_console_row( pp, @@ -1296,8 +1477,13 @@ def pprint(obj, oids=None, big_blobs=False, with_colours=False): with_offsets=True, with_blob=False, with_colours=with_colours, + with_decode_path=with_decode_path, + decode_path_len_decrease=len(decode_path_only), ) - for row in pp_console_blob(pp): + for row in pp_console_blob( + pp, + decode_path_len_decrease=len(decode_path_only), + ): yield row else: yield pp_console_row( @@ -1306,6 +1492,8 @@ def pprint(obj, oids=None, big_blobs=False, with_colours=False): with_offsets=True, with_blob=True, with_colours=with_colours, + with_decode_path=with_decode_path, + decode_path_len_decrease=len(decode_path_only), ) else: for row in _pprint_pps(pp): @@ -1381,6 +1569,9 @@ class Boolean(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __nonzero__(self): @@ -1468,14 +1659,14 @@ class Boolean(Obj): offset=offset, ) first_octet = byte2int(v) - bered = False + ber_encoded = False if first_octet == 0: value = False elif first_octet == 0xFF: value = True elif ctx.get("bered", False): value = True - bered = True + ber_encoded = True else: raise DecodeError( "unacceptable Boolean value", @@ -1491,7 +1682,7 @@ class Boolean(Obj): optional=self.optional, _decoded=(offset, 1, 1), ) - obj.bered = bered + obj.ber_encoded = ber_encoded return obj, v[1:] def __repr__(self): @@ -1499,6 +1690,7 @@ class Boolean(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -1516,8 +1708,11 @@ class Boolean(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, + ber_encoded=self.ber_encoded, bered=self.bered, ) + for pp in self.pps_lenindef(decode_path): + yield pp class Integer(Obj): @@ -1633,6 +1828,9 @@ class Integer(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __int__(self): @@ -1823,6 +2021,7 @@ class Integer(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -1840,7 +2039,10 @@ class Integer(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, + bered=self.bered, ) + for pp in self.pps_lenindef(decode_path): + yield pp class BitString(Obj): @@ -1883,6 +2085,14 @@ class BitString(Obj): ['nonRepudiation', 'keyEncipherment'] >>> b.specs {'nonRepudiation': 1, 'digitalSignature': 0, 'keyEncipherment': 2} + + .. note:: + + Pay attention that BIT STRING can be encoded both in primitive + and constructed forms. Decoder always checks constructed form tag + additionally to specified primitive one. If BER decoding is + :ref:`not enabled `, then decoder will fail, because + of DER restrictions. """ __slots__ = ("tag_constructed", "specs", "defined") tag_default = tag_encode(3) @@ -1958,9 +2168,7 @@ class BitString(Obj): len(value) * 4, hexdec(value + ("" if len(value) % 2 == 0 else "0")), ) - else: - raise InvalidValueType((self.__class__, string_types, binary_type)) - elif isinstance(value, binary_type): + if isinstance(value, binary_type): return (len(value) * 8, value) else: raise InvalidValueType((self.__class__, string_types, binary_type)) @@ -2003,6 +2211,9 @@ class BitString(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __iter__(self): @@ -2146,17 +2357,18 @@ class BitString(Obj): offset=offset, ) if t == self.tag: - if tag_only: + if tag_only: # pragma: no cover return return self._decode_chunk(lv, offset, decode_path, ctx) if t == self.tag_constructed: if not ctx.get("bered", False): raise DecodeError( - msg="unallowed BER constructed encoding", + "unallowed BER constructed encoding", + klass=self.__class__, decode_path=decode_path, offset=offset, ) - if tag_only: + if tag_only: # pragma: no cover return lenindef = False try: @@ -2171,7 +2383,7 @@ class BitString(Obj): decode_path=decode_path, offset=offset, ) - if l > 0 and l > len(v): + if l > len(v): raise NotEnoughData( "encoded length is longer than data", klass=self.__class__, @@ -2197,8 +2409,9 @@ class BitString(Obj): break if vlen > l: raise DecodeError( - msg="chunk out of bounds", - decode_path=len(chunks) - 1, + "chunk out of bounds", + klass=self.__class__, + decode_path=decode_path + (str(len(chunks) - 1),), offset=chunks[-1].offset, ) sub_decode_path = decode_path + (str(len(chunks)),) @@ -2209,10 +2422,12 @@ class BitString(Obj): decode_path=sub_decode_path, leavemm=True, ctx=ctx, + _ctx_immutable=False, ) except TagMismatch: raise DecodeError( - msg="expected BitString encoded chunk", + "expected BitString encoded chunk", + klass=self.__class__, decode_path=sub_decode_path, offset=sub_offset, ) @@ -2222,7 +2437,8 @@ class BitString(Obj): v = v_tail if len(chunks) == 0: raise DecodeError( - msg="no chunks", + "no chunks", + klass=self.__class__, decode_path=decode_path, offset=offset, ) @@ -2231,7 +2447,8 @@ class BitString(Obj): for chunk_i, chunk in enumerate(chunks[:-1]): if chunk.bit_len % 8 != 0: raise DecodeError( - msg="BitString chunk is not multiple of 8 bit", + "BitString chunk is not multiple of 8 bits", + klass=self.__class__, decode_path=decode_path + (str(chunk_i),), offset=chunk.offset, ) @@ -2250,7 +2467,7 @@ class BitString(Obj): _decoded=(offset, llen, vlen + (EOC_LEN if lenindef else 0)), ) obj.lenindef = lenindef - obj.bered = True + obj.ber_encoded = True return obj, (v[EOC_LEN:] if lenindef else v) raise TagMismatch( klass=self.__class__, @@ -2270,6 +2487,7 @@ class BitString(Obj): if len(self.specs) > 0: blob = tuple(self.named) yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -2289,6 +2507,7 @@ class BitString(Obj): expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, lenindef=self.lenindef, + ber_encoded=self.ber_encoded, bered=self.bered, ) defined_by, defined = self.defined or (None, None) @@ -2296,6 +2515,8 @@ class BitString(Obj): yield defined.pps( decode_path=decode_path + (DecodePathDefBy(defined_by),) ) + for pp in self.pps_lenindef(decode_path): + yield pp class OctetString(Obj): @@ -2313,6 +2534,14 @@ class OctetString(Obj): pyderasn.BoundsError: unsatisfied bounds: 4 <= 5 <= 4 >>> OctetString(b"hell", bounds=(4, 4)) OCTET STRING 4 bytes 68656c6c + + .. note:: + + Pay attention that OCTET STRING can be encoded both in primitive + and constructed forms. Decoder always checks constructed form tag + additionally to specified primitive one. If BER decoding is + :ref:`not enabled `, then decoder will fail, because + of DER restrictions. """ __slots__ = ("tag_constructed", "_bound_min", "_bound_max", "defined") tag_default = tag_encode(4) @@ -2397,6 +2626,9 @@ class OctetString(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __bytes__(self): @@ -2507,7 +2739,8 @@ class OctetString(Obj): if t == self.tag_constructed: if not ctx.get("bered", False): raise DecodeError( - msg="unallowed BER constructed encoding", + "unallowed BER constructed encoding", + klass=self.__class__, decode_path=decode_path, offset=offset, ) @@ -2526,20 +2759,13 @@ class OctetString(Obj): decode_path=decode_path, offset=offset, ) - if l > 0 and l > len(v): + if l > len(v): raise NotEnoughData( "encoded length is longer than data", klass=self.__class__, decode_path=decode_path, offset=offset, ) - if not lenindef and l == 0: - raise NotEnoughData( - "zero length", - klass=self.__class__, - decode_path=decode_path, - offset=offset, - ) chunks = [] sub_offset = offset + tlen + llen vlen = 0 @@ -2552,8 +2778,9 @@ class OctetString(Obj): break if vlen > l: raise DecodeError( - msg="chunk out of bounds", - decode_path=len(chunks) - 1, + "chunk out of bounds", + klass=self.__class__, + decode_path=decode_path + (str(len(chunks) - 1),), offset=chunks[-1].offset, ) sub_decode_path = decode_path + (str(len(chunks)),) @@ -2564,10 +2791,12 @@ class OctetString(Obj): decode_path=sub_decode_path, leavemm=True, ctx=ctx, + _ctx_immutable=False, ) except TagMismatch: raise DecodeError( - msg="expected OctetString encoded chunk", + "expected OctetString encoded chunk", + klass=self.__class__, decode_path=sub_decode_path, offset=sub_offset, ) @@ -2575,12 +2804,6 @@ class OctetString(Obj): sub_offset += chunk.tlvlen vlen += chunk.tlvlen v = v_tail - if len(chunks) == 0: - raise DecodeError( - msg="no chunks", - decode_path=decode_path, - offset=offset, - ) try: obj = self.__class__( value=b"".join(bytes(chunk) for chunk in chunks), @@ -2606,7 +2829,7 @@ class OctetString(Obj): offset=offset, ) obj.lenindef = lenindef - obj.bered = True + obj.ber_encoded = True return obj, (v[EOC_LEN:] if lenindef else v) raise TagMismatch( klass=self.__class__, @@ -2619,6 +2842,7 @@ class OctetString(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -2638,6 +2862,7 @@ class OctetString(Obj): expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, lenindef=self.lenindef, + ber_encoded=self.ber_encoded, bered=self.bered, ) defined_by, defined = self.defined or (None, None) @@ -2645,6 +2870,8 @@ class OctetString(Obj): yield defined.pps( decode_path=decode_path + (DecodePathDefBy(defined_by),) ) + for pp in self.pps_lenindef(decode_path): + yield pp class Null(Obj): @@ -2688,6 +2915,9 @@ class Null(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __eq__(self, their): @@ -2730,7 +2960,7 @@ class Null(Obj): decode_path=decode_path, offset=offset, ) - if tag_only: + if tag_only: # pragma: no cover return try: l, _, v = len_decode(lv) @@ -2761,6 +2991,7 @@ class Null(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -2776,7 +3007,10 @@ class Null(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, + bered=self.bered, ) + for pp in self.pps_lenindef(decode_path): + yield pp class ObjectIdentifier(Obj): @@ -2894,6 +3128,9 @@ class ObjectIdentifier(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __iter__(self): @@ -2978,7 +3215,7 @@ class ObjectIdentifier(Obj): decode_path=decode_path, offset=offset, ) - if tag_only: + if tag_only: # pragma: no cover return try: l, llen, v = len_decode(lv) @@ -3005,11 +3242,17 @@ class ObjectIdentifier(Obj): ) v, tail = v[:l], v[l:] arcs = [] + ber_encoded = False while len(v) > 0: i = 0 arc = 0 while True: octet = indexbytes(v, i) + if i == 0 and octet == 0x80: + if ctx.get("bered", False): + ber_encoded = True + else: + raise DecodeError("non normalized arc encoding") arc = (arc << 7) | (octet & 0x7F) if octet & 0x80 == 0: arcs.append(arc) @@ -3041,6 +3284,8 @@ class ObjectIdentifier(Obj): optional=self.optional, _decoded=(offset, llen, l), ) + if ber_encoded: + obj.ber_encoded = True return obj, tail def __repr__(self): @@ -3048,6 +3293,7 @@ class ObjectIdentifier(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -3065,7 +3311,11 @@ class ObjectIdentifier(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, + ber_encoded=self.ber_encoded, + bered=self.bered, ) + for pp in self.pps_lenindef(decode_path): + yield pp class Enumerated(Integer): @@ -3130,6 +3380,9 @@ class Enumerated(Integer): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __call__( @@ -3271,6 +3524,7 @@ class CommonString(OctetString): if self.ready: value = hexenc(bytes(self)) if no_unicode else self.__unicode__() yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -3288,7 +3542,11 @@ class CommonString(OctetString): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, + ber_encoded=self.ber_encoded, + bered=self.bered, ) + for pp in self.pps_lenindef(decode_path): + yield pp class UTF8String(CommonString): @@ -3298,25 +3556,57 @@ class UTF8String(CommonString): asn1_type_name = "UTF8String" -class NumericString(CommonString): +class AllowableCharsMixin(object): + @property + def allowable_chars(self): + if PY2: + return self._allowable_chars + return set(six_unichr(c) for c in self._allowable_chars) + + +class NumericString(AllowableCharsMixin, CommonString): + """Numeric string + + Its value is properly sanitized: only ASCII digits with spaces can + be stored. + + >>> NumericString().allowable_chars + set(['3', '4', '7', '5', '1', '0', '8', '9', ' ', '6', '2']) + """ __slots__ = () tag_default = tag_encode(18) encoding = "ascii" asn1_type_name = "NumericString" - allowable_chars = set(digits.encode("ascii")) + _allowable_chars = set(digits.encode("ascii") + b" ") def _value_sanitize(self, value): value = super(NumericString, self)._value_sanitize(value) - if not set(value) <= self.allowable_chars: + if not set(value) <= self._allowable_chars: raise DecodeError("non-numeric value") return value -class PrintableString(CommonString): +class PrintableString(AllowableCharsMixin, CommonString): + """Printable string + + Its value is properly sanitized: see X.680 41.4 table 10. + + >>> PrintableString().allowable_chars + >>> set([' ', "'", ..., 'z']) + """ __slots__ = () tag_default = tag_encode(19) encoding = "ascii" asn1_type_name = "PrintableString" + _allowable_chars = set( + (ascii_letters + digits + " '()+,-./:=?").encode("ascii") + ) + + def _value_sanitize(self, value): + value = super(PrintableString, self)._value_sanitize(value) + if not set(value) <= self._allowable_chars: + raise DecodeError("non-printable value") + return value class TeletexString(CommonString): @@ -3415,11 +3705,14 @@ class UTCTime(CommonString): if isinstance(value, datetime): return value.strftime(self.fmt).encode("ascii") if isinstance(value, binary_type): - value_decoded = value.decode("ascii") + try: + value_decoded = value.decode("ascii") + except (UnicodeEncodeError, UnicodeDecodeError) as err: + raise DecodeError("invalid UTCTime encoding") if len(value_decoded) == LEN_YYMMDDHHMMSSZ: try: datetime.strptime(value_decoded, self.fmt) - except ValueError: + except (TypeError, ValueError): raise DecodeError("invalid UTCTime format") return value else: @@ -3464,6 +3757,7 @@ class UTCTime(CommonString): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -3481,7 +3775,11 @@ class UTCTime(CommonString): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, + ber_encoded=self.ber_encoded, + bered=self.bered, ) + for pp in self.pps_lenindef(decode_path): + yield pp class GeneralizedTime(UTCTime): @@ -3511,11 +3809,14 @@ class GeneralizedTime(UTCTime): self.fmt_ms if value.microsecond > 0 else self.fmt ).encode("ascii") if isinstance(value, binary_type): - value_decoded = value.decode("ascii") + try: + value_decoded = value.decode("ascii") + except (UnicodeEncodeError, UnicodeDecodeError) as err: + raise DecodeError("invalid GeneralizedTime encoding") if len(value_decoded) == LEN_YYYYMMDDHHMMSSZ: try: datetime.strptime(value_decoded, self.fmt) - except ValueError: + except (TypeError, ValueError): raise DecodeError( "invalid GeneralizedTime (without ms) format", ) @@ -3523,7 +3824,7 @@ class GeneralizedTime(UTCTime): elif len(value_decoded) >= LEN_YYYYMMDDHHMMSSDMZ: try: datetime.strptime(value_decoded, self.fmt_ms) - except ValueError: + except (TypeError, ValueError): raise DecodeError( "invalid GeneralizedTime (with ms) format", ) @@ -3674,6 +3975,13 @@ class Choice(Obj): def ready(self): return self._value is not None and self._value[1].ready + @property + def bered(self): + return self.expl_lenindef or ( + (self._value is not None) and + self._value[1].bered + ) + def copy(self): obj = self.__class__(schema=self.specs) obj._expl = self._expl @@ -3682,6 +3990,9 @@ class Choice(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded value = self._value if value is not None: obj._value = (value[0], value[1].copy()) @@ -3763,6 +4074,7 @@ class Choice(Obj): decode_path=sub_decode_path, ctx=ctx, tag_only=True, + _ctx_immutable=False, ) except TagMismatch: continue @@ -3773,7 +4085,7 @@ class Choice(Obj): decode_path=decode_path, offset=offset, ) - if tag_only: + if tag_only: # pragma: no cover return value, tail = spec.decode( tlv, @@ -3781,13 +4093,14 @@ class Choice(Obj): leavemm=True, decode_path=sub_decode_path, ctx=ctx, + _ctx_immutable=False, ) obj = self.__class__( schema=self.specs, expl=self._expl, default=self.default, optional=self.optional, - _decoded=(offset, 0, value.tlvlen), + _decoded=(offset, 0, value.fulllen), ) obj._value = (choice, value) return obj, tail @@ -3800,6 +4113,7 @@ class Choice(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -3813,9 +4127,12 @@ class Choice(Obj): llen=self.llen, vlen=self.vlen, expl_lenindef=self.expl_lenindef, + bered=self.bered, ) if self.ready: yield self.value.pps(decode_path=decode_path + (self.choice,)) + for pp in self.pps_lenindef(decode_path): + yield pp class PrimitiveTypes(Choice): @@ -3899,6 +4216,14 @@ class Any(Obj): def ready(self): return self._value is not None + @property + def bered(self): + if self.expl_lenindef or self.lenindef: + return True + if self.defined is None: + return False + return self.defined[1].bered + def copy(self): obj = self.__class__() obj._value = self._value @@ -3908,6 +4233,9 @@ class Any(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded return obj def __eq__(self, their): @@ -3964,29 +4292,28 @@ class Any(Obj): llen, vlen, v = 1, 0, lv[1:] sub_offset = offset + tlen + llen chunk_i = 0 - while True: - if v[:EOC_LEN].tobytes() == EOC: - tlvlen = tlen + llen + vlen + EOC_LEN - obj = self.__class__( - value=tlv[:tlvlen].tobytes(), - expl=self._expl, - optional=self.optional, - _decoded=(offset, 0, tlvlen), - ) - obj.lenindef = True - obj.tag = t - return obj, v[EOC_LEN:] - else: - chunk, v = Any().decode( - v, - offset=sub_offset, - decode_path=decode_path + (str(chunk_i),), - leavemm=True, - ctx=ctx, - ) - vlen += chunk.tlvlen - sub_offset += chunk.tlvlen - chunk_i += 1 + while v[:EOC_LEN].tobytes() != EOC: + chunk, v = Any().decode( + v, + offset=sub_offset, + decode_path=decode_path + (str(chunk_i),), + leavemm=True, + ctx=ctx, + _ctx_immutable=False, + ) + vlen += chunk.tlvlen + sub_offset += chunk.tlvlen + chunk_i += 1 + tlvlen = tlen + llen + vlen + EOC_LEN + obj = self.__class__( + value=tlv[:tlvlen].tobytes(), + expl=self._expl, + optional=self.optional, + _decoded=(offset, 0, tlvlen), + ) + obj.lenindef = True + obj.tag = t + return obj, v[EOC_LEN:] except DecodeError as err: raise err.__class__( msg=err.msg, @@ -4017,6 +4344,7 @@ class Any(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -4035,12 +4363,15 @@ class Any(Obj): expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, lenindef=self.lenindef, + bered=self.bered, ) defined_by, defined = self.defined or (None, None) if defined_by is not None: yield defined.pps( decode_path=decode_path + (DecodePathDefBy(defined_by),) ) + for pp in self.pps_lenindef(decode_path): + yield pp ######################################################################## @@ -4063,8 +4394,7 @@ def get_def_by_path(defines_by_path, sub_decode_path): def abs_decode_path(decode_path, rel_path): """Create an absolute decode path from current and relative ones - :param decode_path: current decode path, starting point. - Tuple of strings + :param decode_path: current decode path, starting point. Tuple of strings :param rel_path: relative path to ``decode_path``. Tuple of strings. If first tuple's element is "/", then treat it as an absolute path, ignoring ``decode_path`` as @@ -4144,7 +4474,7 @@ class Sequence(Obj): ("algorithm", ObjectIdentifier("1.2.3")), ("parameters", Any(Null())) )) - AlgorithmIdentifier SEQUENCE[OBJECT IDENTIFIER 1.2.3, ANY 0500 OPTIONAL] + AlgorithmIdentifier SEQUENCE[algorithm: OBJECT IDENTIFIER 1.2.3; parameters: ANY 0500 OPTIONAL] You can determine if value exists/set in the sequence and take its value: @@ -4165,18 +4495,14 @@ class Sequence(Obj): All defaulted values are always optional. - .. _strict_default_existence_ctx: - - .. warning:: + .. _allow_default_values_ctx: - When decoded DER contains defaulted value inside, then - technically this is not valid DER encoding. But we allow and pass - it **by default**. Of course reencoding of that kind of DER will - result in different binary representation (validly without - defaulted value inside). You can enable strict defaulted values - existence validation by setting ``"strict_default_existence": - True`` :ref:`context ` option -- decoding process will raise - an exception if defaulted value is met. + DER prohibits default value encoding and will raise an error if + default value is unexpectedly met during decode. + If :ref:`bered ` context option is set, then no error + will be raised, but ``bered`` attribute set. You can disable strict + defaulted values existence validation by setting + ``"allow_default_values": True`` :ref:`context ` option. Two sequences are equal if they have equal specification (schema), implicit/explicit tagging and the same values. @@ -4234,6 +4560,12 @@ class Sequence(Obj): return False return True + @property + def bered(self): + if self.expl_lenindef or self.lenindef or self.ber_encoded: + return True + return any(value.bered for value in self._value.values()) + def copy(self): obj = self.__class__(schema=self.specs) obj.tag = self.tag @@ -4243,6 +4575,9 @@ class Sequence(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded obj._value = {k: v.copy() for k, v in self._value.items()} return obj @@ -4333,13 +4668,14 @@ class Sequence(Obj): decode_path=decode_path, offset=offset, ) - if tag_only: + if tag_only: # pragma: no cover return lenindef = False + ctx_bered = ctx.get("bered", False) try: l, llen, v = len_decode(lv) except LenIndefForm as err: - if not ctx.get("bered", False): + if not ctx_bered: raise err.__class__( msg=err.msg, klass=self.__class__, @@ -4367,6 +4703,8 @@ class Sequence(Obj): vlen = 0 sub_offset = offset + tlen + llen values = {} + ber_encoded = False + ctx_allow_default_values = ctx.get("allow_default_values", False) for name, spec in self.specs.items(): if spec.optional and ( (lenindef and v[:EOC_LEN].tobytes() == EOC) or @@ -4381,13 +4719,14 @@ class Sequence(Obj): leavemm=True, decode_path=sub_decode_path, ctx=ctx, + _ctx_immutable=False, ) except TagMismatch: if spec.optional: continue raise - defined = get_def_by_path(ctx.get("defines", ()), sub_decode_path) + defined = get_def_by_path(ctx.get("_defines", ()), sub_decode_path) if defined is not None: defined_by, defined_spec = defined if issubclass(value.__class__, SequenceOf): @@ -4405,6 +4744,7 @@ class Sequence(Obj): leavemm=True, decode_path=sub_sub_decode_path, ctx=ctx, + _ctx_immutable=False, ) if len(defined_tail) > 0: raise DecodeError( @@ -4424,6 +4764,7 @@ class Sequence(Obj): leavemm=True, decode_path=sub_decode_path + (DecodePathDefBy(defined_by),), ctx=ctx, + _ctx_immutable=False, ) if len(defined_tail) > 0: raise DecodeError( @@ -4434,20 +4775,20 @@ class Sequence(Obj): ) value.defined = (defined_by, defined_value) - value_len = value.expl_tlvlen if value.expled else value.tlvlen + value_len = value.fulllen vlen += value_len sub_offset += value_len v = v_tail if spec.default is not None and value == spec.default: - if ctx.get("strict_default_existence", False): + if ctx_bered or ctx_allow_default_values: + ber_encoded = True + else: raise DecodeError( "DEFAULT value met", klass=self.__class__, decode_path=sub_decode_path, offset=sub_offset, ) - else: - continue values[name] = value spec_defines = getattr(spec, "defines", ()) @@ -4459,7 +4800,7 @@ class Sequence(Obj): for rel_path, schema in spec_defines: defined = schema.get(value, None) if defined is not None: - ctx.setdefault("defines", []).append(( + ctx.setdefault("_defines", []).append(( abs_decode_path(sub_decode_path[:-1], rel_path), (value, defined), )) @@ -4490,6 +4831,7 @@ class Sequence(Obj): ) obj._value = values obj.lenindef = lenindef + obj.ber_encoded = ber_encoded return obj, tail def __repr__(self): @@ -4499,11 +4841,12 @@ class Sequence(Obj): _value = self._value.get(name) if _value is None: continue - cols.append(repr(_value)) - return "%s[%s]" % (value, ", ".join(cols)) + cols.append("%s: %s" % (name, repr(_value))) + return "%s[%s]" % (value, "; ".join(cols)) def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -4521,18 +4864,30 @@ class Sequence(Obj): expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, lenindef=self.lenindef, + ber_encoded=self.ber_encoded, + bered=self.bered, ) for name in self.specs: value = self._value.get(name) if value is None: continue yield value.pps(decode_path=decode_path + (name,)) + for pp in self.pps_lenindef(decode_path): + yield pp class Set(Sequence): """``SET`` structure type Its usage is identical to :py:class:`pyderasn.Sequence`. + + .. _allow_unordered_set_ctx: + + DER prohibits unordered values encoding and will raise an error + during decode. If If :ref:`bered ` context option is set, + then no error will occure. Also you can disable strict values + ordering check by setting ``"allow_unordered_set": True`` + :ref:`context ` option. """ __slots__ = () tag_default = tag_encode(form=TagFormConstructed, num=17) @@ -4563,10 +4918,11 @@ class Set(Sequence): if tag_only: return lenindef = False + ctx_bered = ctx.get("bered", False) try: l, llen, v = len_decode(lv) except LenIndefForm as err: - if not ctx.get("bered", False): + if not ctx_bered: raise err.__class__( msg=err.msg, klass=self.__class__, @@ -4593,6 +4949,10 @@ class Set(Sequence): vlen = 0 sub_offset = offset + tlen + llen values = {} + ber_encoded = False + ctx_allow_default_values = ctx.get("allow_default_values", False) + ctx_allow_unordered_set = ctx.get("allow_unordered_set", False) + value_prev = memoryview(v[:0]) specs_items = self.specs.items while len(v) > 0: if lenindef and v[:EOC_LEN].tobytes() == EOC: @@ -4607,6 +4967,7 @@ class Set(Sequence): decode_path=sub_decode_path, ctx=ctx, tag_only=True, + _ctx_immutable=False, ) except TagMismatch: continue @@ -4623,14 +4984,35 @@ class Set(Sequence): leavemm=True, decode_path=sub_decode_path, ctx=ctx, + _ctx_immutable=False, ) - value_len = value.expl_tlvlen if value.expled else value.tlvlen + value_len = value.fulllen + if value_prev.tobytes() > v[:value_len].tobytes(): + if ctx_bered or ctx_allow_unordered_set: + ber_encoded = True + else: + raise DecodeError( + "unordered " + self.asn1_type_name, + klass=self.__class__, + decode_path=sub_decode_path, + offset=sub_offset, + ) + if spec.default is None or value != spec.default: + pass + elif ctx_bered or ctx_allow_default_values: + ber_encoded = True + else: + raise DecodeError( + "DEFAULT value met", + klass=self.__class__, + decode_path=sub_decode_path, + offset=sub_offset, + ) + values[name] = value + value_prev = v[:value_len] sub_offset += value_len vlen += value_len v = v_tail - if spec.default is None or value != spec.default: # pragma: no cover - # SeqMixing.test_encoded_default_accepted covers that place - values[name] = value obj = self.__class__( schema=self.specs, impl=self.tag, @@ -4639,16 +5021,26 @@ class Set(Sequence): optional=self.optional, _decoded=(offset, llen, vlen + (EOC_LEN if lenindef else 0)), ) + if lenindef: + if v[:EOC_LEN].tobytes() != EOC: + raise DecodeError( + "no EOC", + klass=self.__class__, + decode_path=decode_path, + offset=offset, + ) + tail = v[EOC_LEN:] + obj.lenindef = True obj._value = values if not obj.ready: raise DecodeError( - msg="not all values are ready", + "not all values are ready", klass=self.__class__, decode_path=decode_path, offset=offset, ) - obj.lenindef = lenindef - return obj, (v[EOC_LEN:] if lenindef else tail) + obj.ber_encoded = ber_encoded + return obj, tail class SequenceOf(Obj): @@ -4746,6 +5138,12 @@ class SequenceOf(Obj): def ready(self): return all(v.ready for v in self._value) + @property + def bered(self): + if self.expl_lenindef or self.lenindef or self.ber_encoded: + return True + return any(v.bered for v in self._value) + def copy(self): obj = self.__class__(schema=self.spec) obj._bound_min = self._bound_min @@ -4757,6 +5155,9 @@ class SequenceOf(Obj): obj.offset = self.offset obj.llen = self.llen obj.vlen = self.vlen + obj.expl_lenindef = self.expl_lenindef + obj.lenindef = self.lenindef + obj.ber_encoded = self.ber_encoded obj._value = [v.copy() for v in self._value] return obj @@ -4831,7 +5232,7 @@ class SequenceOf(Obj): v = b"".join(self._encoded_values()) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset, decode_path, ctx, tag_only): + def _decode(self, tlv, offset, decode_path, ctx, tag_only, ordering_check=False): try: t, tlen, lv = tag_strip(tlv) except DecodeError as err: @@ -4850,10 +5251,11 @@ class SequenceOf(Obj): if tag_only: return lenindef = False + ctx_bered = ctx.get("bered", False) try: l, llen, v = len_decode(lv) except LenIndefForm as err: - if not ctx.get("bered", False): + if not ctx_bered: raise err.__class__( msg=err.msg, klass=self.__class__, @@ -4881,34 +5283,69 @@ class SequenceOf(Obj): vlen = 0 sub_offset = offset + tlen + llen _value = [] + ctx_allow_unordered_set = ctx.get("allow_unordered_set", False) + value_prev = memoryview(v[:0]) + ber_encoded = False spec = self.spec while len(v) > 0: if lenindef and v[:EOC_LEN].tobytes() == EOC: break + sub_decode_path = decode_path + (str(len(_value)),) value, v_tail = spec.decode( v, sub_offset, leavemm=True, - decode_path=decode_path + (str(len(_value)),), + decode_path=sub_decode_path, ctx=ctx, + _ctx_immutable=False, ) - value_len = value.expl_tlvlen if value.expled else value.tlvlen + value_len = value.fulllen + if ordering_check: + if value_prev.tobytes() > v[:value_len].tobytes(): + if ctx_bered or ctx_allow_unordered_set: + ber_encoded = True + else: + raise DecodeError( + "unordered " + self.asn1_type_name, + klass=self.__class__, + decode_path=sub_decode_path, + offset=sub_offset, + ) + value_prev = v[:value_len] + _value.append(value) sub_offset += value_len vlen += value_len v = v_tail - _value.append(value) - obj = self.__class__( - value=_value, - schema=spec, - bounds=(self._bound_min, self._bound_max), - impl=self.tag, - expl=self._expl, - default=self.default, - optional=self.optional, - _decoded=(offset, llen, vlen), - ) - obj.lenindef = lenindef - return obj, (v[EOC_LEN:] if lenindef else tail) + try: + obj = self.__class__( + value=_value, + schema=spec, + bounds=(self._bound_min, self._bound_max), + impl=self.tag, + expl=self._expl, + default=self.default, + optional=self.optional, + _decoded=(offset, llen, vlen + (EOC_LEN if lenindef else 0)), + ) + except BoundsError as err: + raise DecodeError( + msg=str(err), + klass=self.__class__, + decode_path=decode_path, + offset=offset, + ) + if lenindef: + if v[:EOC_LEN].tobytes() != EOC: + raise DecodeError( + "no EOC", + klass=self.__class__, + decode_path=decode_path, + offset=offset, + ) + obj.lenindef = True + tail = v[EOC_LEN:] + obj.ber_encoded = ber_encoded + return obj, tail def __repr__(self): return "%s[%s]" % ( @@ -4918,6 +5355,7 @@ class SequenceOf(Obj): def pps(self, decode_path=()): yield _pp( + obj=self, asn1_type_name=self.asn1_type_name, obj_name=self.__class__.__name__, decode_path=decode_path, @@ -4935,9 +5373,13 @@ class SequenceOf(Obj): expl_vlen=self.expl_vlen if self.expled else None, expl_lenindef=self.expl_lenindef, lenindef=self.lenindef, + ber_encoded=self.ber_encoded, + bered=self.bered, ) for i, value in enumerate(self._value): yield value.pps(decode_path=decode_path + (str(i),)) + for pp in self.pps_lenindef(decode_path): + yield pp class SetOf(SequenceOf): @@ -4955,6 +5397,16 @@ class SetOf(SequenceOf): v = b"".join(raws) return b"".join((self.tag, len_encode(len(v)), v)) + def _decode(self, tlv, offset, decode_path, ctx, tag_only): + return super(SetOf, self)._decode( + tlv, + offset, + decode_path, + ctx, + tag_only, + ordering_check=True, + ) + def obj_by_path(pypath): # pragma: no cover """Import object specified as string Python path @@ -4991,10 +5443,21 @@ def generic_decoder(): # pragma: no cover __slots__ = () schema = choice - def pprint_any(obj, oids=None, with_colours=False): + def pprint_any( + obj, + oids=None, + with_colours=False, + with_decode_path=False, + decode_path_only=(), + ): def _pprint_pps(pps): for pp in pps: if hasattr(pp, "_fields"): + if ( + decode_path_only != () and + pp.decode_path[:len(decode_path_only)] != decode_path_only + ): + continue if pp.asn1_type_name == Choice.asn1_type_name: continue pp_kwargs = pp._asdict() @@ -5006,8 +5469,13 @@ def generic_decoder(): # pragma: no cover with_offsets=True, with_blob=False, with_colours=with_colours, + with_decode_path=with_decode_path, + decode_path_len_decrease=len(decode_path_only), ) - for row in pp_console_blob(pp): + for row in pp_console_blob( + pp, + decode_path_len_decrease=len(decode_path_only), + ): yield row else: for row in _pprint_pps(pp): @@ -5018,7 +5486,7 @@ def generic_decoder(): # pragma: no cover def main(): # pragma: no cover import argparse - parser = argparse.ArgumentParser(description="PyDERASN ASN.1 DER decoder") + parser = argparse.ArgumentParser(description="PyDERASN ASN.1 BER/DER decoder") parser.add_argument( "--skip", type=int, @@ -5037,6 +5505,25 @@ def main(): # pragma: no cover "--defines-by-path", help="Python path to decoder's defines_by_path", ) + parser.add_argument( + "--nobered", + action="store_true", + help="Disallow BER encoding", + ) + parser.add_argument( + "--print-decode-path", + action="store_true", + help="Print decode paths", + ) + parser.add_argument( + "--decode-path-only", + help="Print only specified decode path", + ) + parser.add_argument( + "--allow-expl-oob", + action="store_true", + help="Allow explicit tag out-of-bound", + ) parser.add_argument( "DERFile", type=argparse.FileType("rb"), @@ -5053,7 +5540,10 @@ def main(): # pragma: no cover pprinter = partial(pprint, big_blobs=True) else: schema, pprinter = generic_decoder() - ctx = {"bered": True} + ctx = { + "bered": not args.nobered, + "allow_expl_oob": args.allow_expl_oob, + } if args.defines_by_path is not None: ctx["defines_by_path"] = obj_by_path(args.defines_by_path) obj, tail = schema().decode(der, ctx=ctx) @@ -5061,6 +5551,11 @@ def main(): # pragma: no cover obj, oids=oids, with_colours=True if environ.get("NO_COLOR") is None else False, + with_decode_path=args.print_decode_path, + decode_path_only=( + () if args.decode_path_only is None else + tuple(args.decode_path_only.split(":")) + ), )) if tail != b"": print("\nTrailing data: %s" % hexenc(tail))