X-Git-Url: http://www.git.cypherpunks.ru/?p=pyderasn.git;a=blobdiff_plain;f=pyderasn.py;h=e69b19056ce010fa0ea09e4a111fd68964c34c95;hp=5c311e09c11d2c39eed450a0d2f768bd195f1bea;hb=3bdec8f765e0fa8ed27162a8c0abe391a44dc664;hpb=524e7468d461f5803a01f228fc144b6ec901736d diff --git a/pyderasn.py b/pyderasn.py index 5c311e0..e69b190 100755 --- a/pyderasn.py +++ b/pyderasn.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 # PyDERASN -- Python ASN.1 DER codec with abstract structures -# Copyright (C) 2017 Sergey Matveev +# Copyright (C) 2017-2018 Sergey Matveev # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as @@ -68,7 +68,7 @@ ____ Most types in ASN.1 has specific tag for them. ``Obj.tag_default`` is the default tag used during coding process. You can override it with either ``IMPLICIT`` (using ``impl`` keyword argument), or -``EXPLICIT`` one (using ``expl`` keyword argument). Both arguments takes +``EXPLICIT`` one (using ``expl`` keyword argument). Both arguments take raw binary string, containing that tag. You can **not** set implicit and explicit tags simultaneously. @@ -88,10 +88,10 @@ number. Pay attention that explicit tags always have *constructed* tag Implicit tag is not explicitly shown. -Two object of the same type, but with different implicit/explicit tags +Two objects of the same type, but with different implicit/explicit tags are **not** equal. -You can get objects effective tag (either default or implicited) through +You can get object's effective tag (either default or implicited) through ``tag`` property. You can decode it using :py:func:`pyderasn.tag_decode` function:: @@ -135,6 +135,8 @@ example ``TBSCertificate`` sequence holds defaulted, explicitly tagged When default argument is used and value is not specified, then it equals to default one. +.. _bounds: + Size constraints ________________ @@ -157,15 +159,17 @@ raised. Common methods ______________ -All objects have ``ready`` boolean property, that tells if it is ready -to be encoded. If that kind of action is performed on unready object, -then :py:exc:`pyderasn.ObjNotReady` exception will be raised. +All objects have ``ready`` boolean property, that tells if object is +ready to be encoded. If that kind of action is performed on unready +object, then :py:exc:`pyderasn.ObjNotReady` exception will be raised. + +All objects have ``copy()`` method, that returns their copy, that can be +safely mutated. -All objects have ``copy()`` method, returning its copy, that can be safely -mutated. +.. _decoding: Decoding -________ +-------- Decoding is performed using ``decode()`` method. ``offset`` optional argument could be used to set initial object's offset in the binary @@ -177,7 +181,7 @@ by specifying ``leavemm=True`` argument. When object is decoded, ``decoded`` property is true and you can safely use following properties: -* ``offset`` -- position from initial offset where object's tag is started +* ``offset`` -- position including initial offset where object's tag starts * ``tlen`` -- length of object's tag * ``llen`` -- length of object's length value * ``vlen`` -- length of object's value @@ -191,8 +195,24 @@ lesser than ``offset``), ``expl_tlen``, ``expl_llen``, ``expl_vlen`` When error occurs, then :py:exc:`pyderasn.DecodeError` is raised. +.. _ctx: + +Context +_______ + +You can specify so called context keyword argument during ``decode()`` +invocation. It is dictionary containing various options governing +decoding process. + +Currently available context options: + +* :ref:`defines_by_path ` +* :ref:`strict_default_existence ` + +.. _pprinting: + Pretty printing -_______________ +--------------- All objects have ``pps()`` method, that is a generator of :py:class:`pyderasn.PP` namedtuple, holding various raw information @@ -209,6 +229,140 @@ all object ``repr``. But it is easy to write custom formatters. >>> print(pprint(obj)) 0 [1,1, 2] INTEGER -12345 +.. _definedby: + +DEFINED BY +---------- + +ASN.1 structures often have ANY and OCTET STRING fields, that are +DEFINED BY some previously met ObjectIdentifier. This library provides +ability to specify mapping between some OID and field that must be +decoded with specific specification. + +defines kwarg +_____________ + +:py:class:`pyderasn.ObjectIdentifier` field inside +:py:class:`pyderasn.Sequence` can hold mapping between OIDs and +necessary for decoding structures. For example, CMS (:rfc:`5652`) +container:: + + class ContentInfo(Sequence): + schema = ( + ("contentType", ContentType(defines=((("content",), { + id_digestedData: DigestedData(), + id_signedData: SignedData(), + }),))), + ("content", Any(expl=tag_ctxc(0))), + ) + +``contentType`` field tells that it defines that ``content`` must be +decoded with ``SignedData`` specification, if ``contentType`` equals to +``id-signedData``. The same applies to ``DigestedData``. If +``contentType`` contains unknown OID, then no automatic decoding is +done. + +You can specify multiple fields, that will be autodecoded -- that is why +``defines`` kwarg is a sequence. You can specify defined field +relatively or absolutely to current decode path. For example ``defines`` +for AlgorithmIdentifier of X.509's +``tbsCertificate.subjectPublicKeyInfo.algorithm.algorithm``:: + + ( + (('parameters',), { + id_ecPublicKey: ECParameters(), + id_GostR3410_2001: GostR34102001PublicKeyParameters(), + }), + (('..', 'subjectPublicKey'), { + id_rsaEncryption: RSAPublicKey(), + id_GostR3410_2001: OctetString(), + }), + ), + +tells that if certificate's SPKI algorithm is GOST R 34.10-2001, then +autodecode its parameters inside SPKI's algorithm and its public key +itself. + +Following types can be automatically decoded (DEFINED BY): + +* :py:class:`pyderasn.Any` +* :py:class:`pyderasn.BitString` (that is multiple of 8 bits) +* :py:class:`pyderasn.OctetString` +* :py:class:`pyderasn.SequenceOf`/:py:class:`pyderasn.SetOf` + ``Any``/``OctetString``-s + +When any of those fields is automatically decoded, then ``.defined`` +attribute contains ``(OID, value)`` tuple. ``OID`` tells by which OID it +was defined, ``value`` contains corresponding decoded value. For example +above, ``content_info["content"].defined == (id_signedData, +signed_data)``. + +.. _defines_by_path_ctx: + +defines_by_path context option +______________________________ + +Sometimes you either can not or do not want to explicitly set *defines* +in the scheme. You can dynamically apply those definitions when calling +``.decode()`` method. + +Specify ``defines_by_path`` key in the :ref:`decode context `. Its +value must be sequence of following tuples:: + + (decode_path, defines) + +where ``decode_path`` is a tuple holding so-called decode path to the +exact :py:class:`pyderasn.ObjectIdentifier` field you want to apply +``defines``, holding exactly the same value as accepted in its keyword +argument. + +For example, again for CMS, you want to automatically decode +``SignedData`` and CMC's (:rfc:`5272`) ``PKIData`` and ``PKIResponse`` +structures it may hold. Also, automatically decode ``controlSequence`` +of ``PKIResponse``:: + + content_info, tail = ContentInfo().decode(data, defines_by_path=( + ( + ("contentType",), + ((("content",), {id_signedData: SignedData()}),), + ), + ( + ( + "content", + DecodePathDefBy(id_signedData), + "encapContentInfo", + "eContentType", + ), + ((("eContent",), { + id_cct_PKIData: PKIData(), + id_cct_PKIResponse: PKIResponse(), + })), + ), + ( + ( + "content", + DecodePathDefBy(id_signedData), + "encapContentInfo", + "eContent", + DecodePathDefBy(id_cct_PKIResponse), + "controlSequence", + any, + "attrType", + ), + ((("attrValues",), { + id_cmc_recipientNonce: RecipientNonce(), + id_cmc_senderNonce: SenderNonce(), + id_cmc_statusInfoV2: CMCStatusInfoV2(), + id_cmc_transactionId: TransactionId(), + })), + ), + )) + +Pay attention for :py:class:`pyderasn.DecodePathDefBy` and ``any``. +First function is useful for path construction when some automatic +decoding is already done. ``any`` means literally any value it meet -- +useful for SEQUENCE/SET OF-s. + Primitive types --------------- @@ -302,6 +456,7 @@ _____ Various ------- +.. autofunction:: pyderasn.abs_decode_path .. autofunction:: pyderasn.hexenc .. autofunction:: pyderasn.hexdec .. autofunction:: pyderasn.tag_encode @@ -331,6 +486,13 @@ from six import text_type from six.moves import xrange as six_xrange +try: + from termcolor import colored +except ImportError: + def colored(what, *args): + return what + + __all__ = ( "Any", "BitString", @@ -339,6 +501,7 @@ __all__ = ( "BoundsError", "Choice", "DecodeError", + "DecodePathDefBy", "Enumerated", "GeneralizedTime", "GeneralString", @@ -657,9 +820,9 @@ def len_decode(data): ######################################################################## class AutoAddSlots(type): - def __new__(cls, name, bases, _dict): + def __new__(mcs, name, bases, _dict): _dict["__slots__"] = _dict.get("__slots__", ()) - return type.__new__(cls, name, bases, _dict) + return type.__new__(mcs, name, bases, _dict) @add_metaclass(AutoAddSlots) @@ -688,10 +851,7 @@ class Obj(object): optional=False, _decoded=(0, 0, 0), ): - if impl is None: - self.tag = getattr(self, "impl", self.tag_default) - else: - self.tag = impl + self.tag = getattr(self, "impl", self.tag_default) if impl is None else impl self._expl = getattr(self, "expl", None) if expl is None else expl if self.tag != self.tag_default and self._expl is not None: raise ValueError( @@ -735,10 +895,22 @@ class Obj(object): def __str__(self): # pragma: no cover return self.__bytes__() if PY2 else self.__unicode__() + def __ne__(self, their): + return not(self == their) + + def __gt__(self, their): # pragma: no cover + return not(self < their) + + def __le__(self, their): # pragma: no cover + return (self == their) or (self < their) + + def __ge__(self, their): # pragma: no cover + return (self == their) or (self > their) + def _encode(self): # pragma: no cover raise NotImplementedError() - def _decode(self, tlv, offset=0, decode_path=()): # pragma: no cover + def _decode(self, tlv, offset, decode_path, ctx): # pragma: no cover raise NotImplementedError() def encode(self): @@ -747,21 +919,25 @@ class Obj(object): return raw return b"".join((self._expl, len_encode(len(raw)), raw)) - def decode(self, data, offset=0, leavemm=False, decode_path=()): + def decode(self, data, offset=0, leavemm=False, decode_path=(), ctx=None): """Decode the data :param data: either binary or memoryview :param int offset: initial data's offset :param bool leavemm: do we need to leave memoryview of remaining data as is, or convert it to bytes otherwise + :param ctx: optional :ref:`context ` governing decoding process. :returns: (Obj, remaining data) """ + if ctx is None: + ctx = {} tlv = memoryview(data) if self._expl is None: obj, tail = self._decode( tlv, offset, decode_path=decode_path, + ctx=ctx, ) else: try: @@ -798,7 +974,8 @@ class Obj(object): obj, tail = self._decode( v, offset=offset + tlen + llen, - decode_path=(), + decode_path=decode_path, + ctx=ctx, ) return obj, (tail if leavemm else tail.tobytes()) @@ -831,6 +1008,26 @@ class Obj(object): return self.expl_tlen + self.expl_llen + self.expl_vlen +class DecodePathDefBy(object): + """DEFINED BY representation inside decode path + """ + __slots__ = ('defined_by',) + + def __init__(self, defined_by): + self.defined_by = defined_by + + def __eq__(self, their): + if not isinstance(their, self.__class__): + return False + return self.defined_by == their.defined_by + + def __str__(self): + return "DEFINED BY " + str(self.defined_by) + + def __repr__(self): + return "<%s: %s>" % (self.__class__.__name__, self.defined_by) + + ######################################################################## # Pretty printing ######################################################################## @@ -896,49 +1093,75 @@ def _pp( ) -def pp_console_row(pp, oids=None, with_offsets=False, with_blob=True): +def _colorize(what, colour, with_colours, attrs=("bold",)): + return colored(what, colour, attrs=attrs) if with_colours else what + + +def pp_console_row( + pp, + oids=None, + with_offsets=False, + with_blob=True, + with_colours=False, +): cols = [] if with_offsets: - cols.append("%5d%s [%d,%d,%4d]" % ( + col = "%5d%s" % ( pp.offset, ( " " if pp.expl_offset is None else ("-%d" % (pp.offset - pp.expl_offset)) ), - pp.tlen, - pp.llen, - pp.vlen, - )) + ) + cols.append(_colorize(col, "red", with_colours, ())) + col = "[%d,%d,%4d]" % (pp.tlen, pp.llen, pp.vlen) + cols.append(_colorize(col, "green", with_colours, ())) if len(pp.decode_path) > 0: cols.append(" ." * (len(pp.decode_path))) - cols.append("%s:" % pp.decode_path[-1]) + ent = pp.decode_path[-1] + if isinstance(ent, DecodePathDefBy): + cols.append(_colorize("DEFINED BY", "red", with_colours, ("reverse",))) + value = str(ent.defined_by) + if ( + oids is not None and + ent.defined_by.asn1_type_name == + ObjectIdentifier.asn1_type_name and + value in oids + ): + cols.append(_colorize("%s:" % oids[value], "green", with_colours)) + else: + cols.append(_colorize("%s:" % value, "white", with_colours)) + else: + cols.append(_colorize("%s:" % ent, "yellow", with_colours)) if pp.expl is not None: klass, _, num = pp.expl - cols.append("[%s%d] EXPLICIT" % (TagClassReprs[klass], num)) + col = "[%s%d] EXPLICIT" % (TagClassReprs[klass], num) + cols.append(_colorize(col, "blue", with_colours)) if pp.impl is not None: klass, _, num = pp.impl - cols.append("[%s%d]" % (TagClassReprs[klass], num)) + col = "[%s%d]" % (TagClassReprs[klass], num) + cols.append(_colorize(col, "blue", with_colours)) if pp.asn1_type_name.replace(" ", "") != pp.obj_name.upper(): - cols.append(pp.obj_name) - cols.append(pp.asn1_type_name) + cols.append(_colorize(pp.obj_name, "magenta", with_colours)) + cols.append(_colorize(pp.asn1_type_name, "cyan", with_colours)) if pp.value is not None: value = pp.value + cols.append(_colorize(value, "white", with_colours)) if ( oids is not None and pp.asn1_type_name == ObjectIdentifier.asn1_type_name and value in oids ): - value = "%s (%s)" % (oids[value], pp.value) - cols.append(value) + cols.append(_colorize("(%s)" % oids[value], "green", with_colours)) if with_blob: if isinstance(pp.blob, binary_type): cols.append(hexenc(pp.blob)) elif isinstance(pp.blob, tuple): cols.append(", ".join(pp.blob)) if pp.optional: - cols.append("OPTIONAL") + cols.append(_colorize("OPTIONAL", "red", with_colours)) if pp.default: - cols.append("DEFAULT") + cols.append(_colorize("DEFAULT", "red", with_colours)) return " ".join(cols) @@ -957,7 +1180,7 @@ def pp_console_blob(pp): yield " ".join(cols + [", ".join(pp.blob)]) -def pprint(obj, oids=None, big_blobs=False): +def pprint(obj, oids=None, big_blobs=False, with_colours=False): """Pretty print object :param Obj obj: object you want to pretty print @@ -966,6 +1189,8 @@ def pprint(obj, oids=None, big_blobs=False): :param big_blobs: if large binary objects are met (like OctetString values), do we need to print them too, on separate lines + :param with_colours: colourize output, if ``termcolor`` library + is available """ def _pprint_pps(pps): for pp in pps: @@ -976,11 +1201,18 @@ def pprint(obj, oids=None, big_blobs=False): oids=oids, with_offsets=True, with_blob=False, + with_colours=with_colours, ) for row in pp_console_blob(pp): yield row else: - yield pp_console_row(pp, oids=oids, with_offsets=True) + yield pp_console_row( + pp, + oids=oids, + with_offsets=True, + with_blob=True, + with_colours=with_colours, + ) else: for row in _pprint_pps(pp): yield row @@ -1100,7 +1332,7 @@ class Boolean(Obj): (b"\xFF" if self._value else b"\x00"), )) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -1249,14 +1481,11 @@ class Integer(Obj): self._value = value specs = getattr(self, "schema", {}) if _specs is None else _specs self.specs = specs if isinstance(specs, dict) else dict(specs) - if bounds is None: - self._bound_min, self._bound_max = getattr( - self, - "bounds", - (float("-inf"), float("+inf")), - ) - else: - self._bound_min, self._bound_max = bounds + self._bound_min, self._bound_max = getattr( + self, + "bounds", + (float("-inf"), float("+inf")), + ) if bounds is None else bounds if value is not None: self._value = self._value_sanitize(value) if default is not None: @@ -1327,10 +1556,7 @@ class Integer(Obj): ) def __lt__(self, their): - return self._value < their - - def __gt__(self, their): - return self._value > their + return self._value < their._value @property def named(self): @@ -1399,7 +1625,7 @@ class Integer(Obj): break return b"".join((self.tag, len_encode(len(octets)), octets)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -1550,7 +1776,7 @@ class BitString(Obj): >>> b.specs {'nonRepudiation': 1, 'digitalSignature': 0, 'keyEncipherment': 2} """ - __slots__ = ("specs",) + __slots__ = ("specs", "defined") tag_default = tag_encode(3) asn1_type_name = "BIT STRING" @@ -1587,6 +1813,7 @@ class BitString(Obj): ) if value is None: self._value = default + self.defined = None def _bits2octets(self, bits): if len(self.specs) > 0: @@ -1647,7 +1874,10 @@ class BitString(Obj): def copy(self): obj = self.__class__(_specs=self.specs) - obj._value = self._value + value = self._value + if value is not None: + value = (value[0], value[1]) + obj._value = value obj.tag = self.tag obj._expl = self._expl obj.default = self.default @@ -1729,7 +1959,7 @@ class BitString(Obj): octets, )) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -1832,6 +2062,11 @@ class BitString(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, ) + defined_by, defined = self.defined or (None, None) + if defined_by is not None: + yield defined.pps( + decode_path=decode_path + (DecodePathDefBy(defined_by),) + ) class OctetString(Obj): @@ -1850,7 +2085,7 @@ class OctetString(Obj): >>> OctetString(b"hell", bounds=(4, 4)) OCTET STRING 4 bytes 68656c6c """ - __slots__ = ("_bound_min", "_bound_max") + __slots__ = ("_bound_min", "_bound_max", "defined") tag_default = tag_encode(4) asn1_type_name = "OCTET STRING" @@ -1882,14 +2117,11 @@ class OctetString(Obj): _decoded, ) self._value = value - if bounds is None: - self._bound_min, self._bound_max = getattr( - self, - "bounds", - (0, float("+inf")), - ) - else: - self._bound_min, self._bound_max = bounds + self._bound_min, self._bound_max = getattr( + self, + "bounds", + (0, float("+inf")), + ) if bounds is None else bounds if value is not None: self._value = self._value_sanitize(value) if default is not None: @@ -1901,6 +2133,7 @@ class OctetString(Obj): ) if self._value is None: self._value = default + self.defined = None def _value_sanitize(self, value): if issubclass(value.__class__, OctetString): @@ -1946,6 +2179,9 @@ class OctetString(Obj): self._expl == their._expl ) + def __lt__(self, their): + return self._value < their._value + def __call__( self, value=None, @@ -1975,7 +2211,7 @@ class OctetString(Obj): self._value, )) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -2050,6 +2286,11 @@ class OctetString(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, ) + defined_by, defined = self.defined or (None, None) + if defined_by is not None: + yield defined.pps( + decode_path=decode_path + (DecodePathDefBy(defined_by),) + ) class Null(Obj): @@ -2119,7 +2360,7 @@ class Null(Obj): def _encode(self): return self.tag + len_encode(0) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -2199,13 +2440,14 @@ class ObjectIdentifier(Obj): Traceback (most recent call last): pyderasn.InvalidOID: unacceptable first arc value """ - __slots__ = () + __slots__ = ("defines",) tag_default = tag_encode(6) asn1_type_name = "OBJECT IDENTIFIER" def __init__( self, value=None, + defines=(), impl=None, expl=None, default=None, @@ -2216,6 +2458,15 @@ class ObjectIdentifier(Obj): :param value: set the value. Either tuples of integers, string of "."-concatenated integers, or :py:class:`pyderasn.ObjectIdentifier` object + :param defines: sequence of tuples. Each tuple has two elements. + First one is relative to current one decode + path, aiming to the field defined by that OID. + Read about relative path in + :py:func:`pyderasn.abs_decode_path`. Second + tuple element is ``{OID: pyderasn.Obj()}`` + dictionary, mapping between current OID value + and structure applied to defined field. + :ref:`Read about DEFINED BY ` :param bytes impl: override default tag with ``IMPLICIT`` one :param bytes expl: override default tag with ``EXPLICIT`` one :param default: set default value. Type same as in ``value`` @@ -2240,6 +2491,7 @@ class ObjectIdentifier(Obj): ) if self._value is None: self._value = default + self.defines = defines def __add__(self, their): if isinstance(their, self.__class__): @@ -2277,6 +2529,7 @@ class ObjectIdentifier(Obj): def copy(self): obj = self.__class__() obj._value = self._value + obj.defines = self.defines obj.tag = self.tag obj._expl = self._expl obj.default = self.default @@ -2313,14 +2566,12 @@ class ObjectIdentifier(Obj): ) def __lt__(self, their): - return self._value < their - - def __gt__(self, their): - return self._value > their + return self._value < their._value def __call__( self, value=None, + defines=None, impl=None, expl=None, default=None, @@ -2328,6 +2579,7 @@ class ObjectIdentifier(Obj): ): return self.__class__( value=value, + defines=self.defines if defines is None else defines, impl=self.tag if impl is None else impl, expl=self._expl if expl is None else expl, default=self.default if default is None else default, @@ -2353,7 +2605,7 @@ class ObjectIdentifier(Obj): v = b"".join(octets) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, _, lv = tag_strip(tlv) except DecodeError as err: @@ -2718,6 +2970,11 @@ class IA5String(CommonString): asn1_type_name = "IA5" +LEN_YYMMDDHHMMSSZ = len("YYMMDDHHMMSSZ") +LEN_YYYYMMDDHHMMSSDMZ = len("YYYYMMDDHHMMSSDMZ") +LEN_YYYYMMDDHHMMSSZ = len("YYYYMMDDHHMMSSZ") + + class UTCTime(CommonString): """``UTCTime`` datetime type @@ -2784,7 +3041,7 @@ class UTCTime(CommonString): return value.strftime(self.fmt).encode("ascii") if isinstance(value, binary_type): value_decoded = value.decode("ascii") - if len(value_decoded) == 2 + 2 + 2 + 2 + 2 + 2 + 1: + if len(value_decoded) == LEN_YYMMDDHHMMSSZ: try: datetime.strptime(value_decoded, self.fmt) except ValueError: @@ -2875,7 +3132,7 @@ class GeneralizedTime(UTCTime): ).encode("ascii") if isinstance(value, binary_type): value_decoded = value.decode("ascii") - if len(value_decoded) == 4 + 2 + 2 + 2 + 2 + 2 + 1: + if len(value_decoded) == LEN_YYYYMMDDHHMMSSZ: try: datetime.strptime(value_decoded, self.fmt) except ValueError: @@ -2883,7 +3140,7 @@ class GeneralizedTime(UTCTime): "invalid GeneralizedTime (without ms) format", ) return value - elif len(value_decoded) >= 4 + 2 + 2 + 2 + 2 + 2 + 1 + 1 + 1: + elif len(value_decoded) >= LEN_YYYYMMDDHHMMSSDMZ: try: datetime.strptime(value_decoded, self.fmt_ms) except ValueError: @@ -2900,7 +3157,7 @@ class GeneralizedTime(UTCTime): def todatetime(self): value = self._value.decode("ascii") - if len(value) == 4 + 2 + 2 + 2 + 2 + 2 + 1: + if len(value) == LEN_YYYYMMDDHHMMSSZ: return datetime.strptime(value, self.fmt) return datetime.strptime(value, self.fmt_ms) @@ -3115,7 +3372,7 @@ class Choice(Obj): self._assert_ready() return self._value[1].encode() - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): for choice, spec in self.specs.items(): try: value, tail = spec.decode( @@ -3123,6 +3380,7 @@ class Choice(Obj): offset=offset, leavemm=True, decode_path=decode_path + (choice,), + ctx=ctx, ) except TagMismatch: continue @@ -3211,7 +3469,7 @@ class Any(Obj): >>> hexenc(bytes(a)) b'0x040x0bhello world' """ - __slots__ = () + __slots__ = ("defined",) tag_default = tag_encode(0) asn1_type_name = "ANY" @@ -3232,6 +3490,7 @@ class Any(Obj): """ super(Any, self).__init__(None, expl, None, optional, _decoded) self._value = None if value is None else self._value_sanitize(value) + self.defined = None def _value_sanitize(self, value): if isinstance(value, self.__class__): @@ -3288,7 +3547,7 @@ class Any(Obj): self._assert_ready() return self._value - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, tlen, lv = tag_strip(tlv) l, llen, v = len_decode(lv) @@ -3339,19 +3598,62 @@ class Any(Obj): expl_llen=self.expl_llen if self.expled else None, expl_vlen=self.expl_vlen if self.expled else None, ) + defined_by, defined = self.defined or (None, None) + if defined_by is not None: + yield defined.pps( + decode_path=decode_path + (DecodePathDefBy(defined_by),) + ) ######################################################################## # ASN.1 constructed types ######################################################################## +def get_def_by_path(defines_by_path, sub_decode_path): + """Get define by decode path + """ + for path, define in defines_by_path: + if len(path) != len(sub_decode_path): + continue + for p1, p2 in zip(path, sub_decode_path): + if (p1 != any) and (p1 != p2): + break + else: + return define + + +def abs_decode_path(decode_path, rel_path): + """Create an absolute decode path from current and relative ones + + :param decode_path: current decode path, starting point. + Tuple of strings + :param rel_path: relative path to ``decode_path``. Tuple of strings. + If first tuple's element is "/", then treat it as + an absolute path, ignoring ``decode_path`` as + starting point. Also this tuple can contain ".." + elements, stripping the leading element from + ``decode_path`` + + >>> abs_decode_path(("foo", "bar"), ("baz", "whatever")) + ("foo", "bar", "baz", "whatever") + >>> abs_decode_path(("foo", "bar", "baz"), ("..", "..", "whatever")) + ("foo", "whatever") + >>> abs_decode_path(("foo", "bar"), ("/", "baz", "whatever")) + ("baz", "whatever") + """ + if rel_path[0] == "/": + return rel_path[1:] + if rel_path[0] == "..": + return abs_decode_path(decode_path[:-1], rel_path[1:]) + return decode_path + rel_path + + class Sequence(Obj): """``SEQUENCE`` structure type You have to make specification of sequence:: class Extension(Sequence): - __slots__ = () schema = ( ("extnID", ObjectIdentifier()), ("critical", Boolean(default=False)), @@ -3396,6 +3698,8 @@ class Sequence(Obj): >>> tbs = TBSCertificate() >>> tbs["version"] = Version("v2") # no need to explicitly add ``expl`` + Assign ``None`` to remove value from sequence. + You can know if value exists/set in the sequence and take its value: >>> "extnID" in ext, "extnValue" in ext, "critical" in ext @@ -3415,13 +3719,18 @@ class Sequence(Obj): All defaulted values are always optional. + .. _strict_default_existence_ctx: + .. warning:: When decoded DER contains defaulted value inside, then - technically this is not valid DER encoding. But we allow - and pass it. Of course reencoding of that kind of DER will + technically this is not valid DER encoding. But we allow and pass + it **by default**. Of course reencoding of that kind of DER will result in different binary representation (validly without - defaulted value inside). + defaulted value inside). You can enable strict defaulted values + existence validation by setting ``"strict_default_existence": + True`` :ref:`context ` option -- decoding process will raise + an exception if defaulted value is met. Two sequences are equal if they have equal specification (schema), implicit/explicit tagging and the same values. @@ -3559,7 +3868,7 @@ class Sequence(Obj): v = b"".join(self._encoded_values()) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, tlen, lv = tag_strip(tlv) except DecodeError as err: @@ -3597,24 +3906,94 @@ class Sequence(Obj): for name, spec in self.specs.items(): if len(v) == 0 and spec.optional: continue + sub_decode_path = decode_path + (name,) try: value, v_tail = spec.decode( v, sub_offset, leavemm=True, - decode_path=decode_path + (name,), + decode_path=sub_decode_path, + ctx=ctx, ) except TagMismatch: if spec.optional: continue raise + + defined = get_def_by_path(ctx.get("defines", ()), sub_decode_path) + if defined is not None: + defined_by, defined_spec = defined + if issubclass(value.__class__, SequenceOf): + for i, _value in enumerate(value): + sub_sub_decode_path = sub_decode_path + ( + str(i), + DecodePathDefBy(defined_by), + ) + defined_value, defined_tail = defined_spec.decode( + memoryview(bytes(_value)), + sub_offset + ( + (value.tlen + value.llen + value.expl_tlen + value.expl_llen) + if value.expled else (value.tlen + value.llen) + ), + leavemm=True, + decode_path=sub_sub_decode_path, + ctx=ctx, + ) + if len(defined_tail) > 0: + raise DecodeError( + "remaining data", + klass=self.__class__, + decode_path=sub_sub_decode_path, + offset=offset, + ) + _value.defined = (defined_by, defined_value) + else: + defined_value, defined_tail = defined_spec.decode( + memoryview(bytes(value)), + sub_offset + ( + (value.tlen + value.llen + value.expl_tlen + value.expl_llen) + if value.expled else (value.tlen + value.llen) + ), + leavemm=True, + decode_path=sub_decode_path + (DecodePathDefBy(defined_by),), + ctx=ctx, + ) + if len(defined_tail) > 0: + raise DecodeError( + "remaining data", + klass=self.__class__, + decode_path=sub_decode_path + (DecodePathDefBy(defined_by),), + offset=offset, + ) + value.defined = (defined_by, defined_value) + sub_offset += (value.expl_tlvlen if value.expled else value.tlvlen) v = v_tail if spec.default is not None and value == spec.default: - # Encoded default values are not valid in DER, - # but we still allow that - continue + if ctx.get("strict_default_existence", False): + raise DecodeError( + "DEFAULT value met", + klass=self.__class__, + decode_path=sub_decode_path, + offset=sub_offset, + ) + else: + continue values[name] = value + + spec_defines = getattr(spec, "defines", ()) + if len(spec_defines) == 0: + defines_by_path = ctx.get("defines_by_path", ()) + if len(defines_by_path) > 0: + spec_defines = get_def_by_path(defines_by_path, sub_decode_path) + if spec_defines is not None and len(spec_defines) > 0: + for rel_path, schema in spec_defines: + defined = schema.get(value, None) + if defined is not None: + ctx.setdefault("defines", []).append(( + abs_decode_path(sub_decode_path[:-1], rel_path), + (value, defined), + )) if len(v) > 0: raise DecodeError( "remaining data", @@ -3683,7 +4062,7 @@ class Set(Sequence): v = b"".join(raws) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, tlen, lv = tag_strip(tlv) except DecodeError as err: @@ -3726,6 +4105,7 @@ class Set(Sequence): sub_offset, leavemm=True, decode_path=decode_path + (name,), + ctx=ctx, ) except TagMismatch: continue @@ -3812,14 +4192,11 @@ class SequenceOf(Obj): if schema is None: raise ValueError("schema must be specified") self.spec = schema - if bounds is None: - self._bound_min, self._bound_max = getattr( - self, - "bounds", - (0, float("+inf")), - ) - else: - self._bound_min, self._bound_max = bounds + self._bound_min, self._bound_max = getattr( + self, + "bounds", + (0, float("+inf")), + ) if bounds is None else bounds self._value = [] if value is not None: self._value = self._value_sanitize(value) @@ -3938,7 +4315,7 @@ class SequenceOf(Obj): v = b"".join(self._encoded_values()) return b"".join((self.tag, len_encode(len(v)), v)) - def _decode(self, tlv, offset=0, decode_path=()): + def _decode(self, tlv, offset, decode_path, ctx): try: t, tlen, lv = tag_strip(tlv) except DecodeError as err: @@ -3980,6 +4357,7 @@ class SequenceOf(Obj): sub_offset, leavemm=True, decode_path=decode_path + (str(len(_value)),), + ctx=ctx, ) sub_offset += (value.expl_tlvlen if value.expled else value.tlvlen) v = v_tail @@ -4058,9 +4436,57 @@ def obj_by_path(pypath): # pragma: no cover return obj +def generic_decoder(): # pragma: no cover + # All of this below is a big hack with self references + choice = PrimitiveTypes() + choice.specs["SequenceOf"] = SequenceOf(schema=choice) + choice.specs["SetOf"] = SetOf(schema=choice) + for i in range(31): + choice.specs["SequenceOf%d" % i] = SequenceOf( + schema=choice, + expl=tag_ctxc(i), + ) + choice.specs["Any"] = Any() + + # Class name equals to type name, to omit it from output + class SEQUENCEOF(SequenceOf): + __slots__ = () + schema = choice + + def pprint_any(obj, oids=None, with_colours=False): + def _pprint_pps(pps): + for pp in pps: + if hasattr(pp, "_fields"): + if pp.asn1_type_name == Choice.asn1_type_name: + continue + pp_kwargs = pp._asdict() + pp_kwargs["decode_path"] = pp.decode_path[:-1] + (">",) + pp = _pp(**pp_kwargs) + yield pp_console_row( + pp, + oids=oids, + with_offsets=True, + with_blob=False, + with_colours=with_colours, + ) + for row in pp_console_blob(pp): + yield row + else: + for row in _pprint_pps(pp): + yield row + return "\n".join(_pprint_pps(obj.pps())) + return SEQUENCEOF(), pprint_any + + def main(): # pragma: no cover import argparse parser = argparse.ArgumentParser(description="PyDERASN ASN.1 DER decoder") + parser.add_argument( + "--skip", + type=int, + default=0, + help="Skip that number of bytes from the beginning", + ) parser.add_argument( "--oids", help="Python path to dictionary with OIDs", @@ -4069,12 +4495,22 @@ def main(): # pragma: no cover "--schema", help="Python path to schema definition to use", ) + parser.add_argument( + "--defines-by-path", + help="Python path to decoder's defines_by_path", + ) + parser.add_argument( + "--with-colours", + action='store_true', + help="Enable coloured output", + ) parser.add_argument( "DERFile", type=argparse.FileType("rb"), help="Path to DER file you want to decode", ) args = parser.parse_args() + args.DERFile.seek(args.skip) der = memoryview(args.DERFile.read()) args.DERFile.close() oids = obj_by_path(args.oids) if args.oids else {} @@ -4083,47 +4519,19 @@ def main(): # pragma: no cover from functools import partial pprinter = partial(pprint, big_blobs=True) else: - # All of this below is a big hack with self references - choice = PrimitiveTypes() - choice.specs["SequenceOf"] = SequenceOf(schema=choice) - choice.specs["SetOf"] = SetOf(schema=choice) - for i in range(31): - choice.specs["SequenceOf%d" % i] = SequenceOf( - schema=choice, - expl=tag_ctxc(i), - ) - choice.specs["Any"] = Any() - - # Class name equals to type name, to omit it from output - class SEQUENCEOF(SequenceOf): - __slots__ = () - schema = choice - schema = SEQUENCEOF() - - def pprint_any(obj, oids=None): - def _pprint_pps(pps): - for pp in pps: - if hasattr(pp, "_fields"): - if pp.asn1_type_name == Choice.asn1_type_name: - continue - pp_kwargs = pp._asdict() - pp_kwargs["decode_path"] = pp.decode_path[:-1] + (">",) - pp = _pp(**pp_kwargs) - yield pp_console_row( - pp, - oids=oids, - with_offsets=True, - with_blob=False, - ) - for row in pp_console_blob(pp): - yield row - else: - for row in _pprint_pps(pp): - yield row - return "\n".join(_pprint_pps(obj.pps())) - pprinter = pprint_any - obj, tail = schema().decode(der) - print(pprinter(obj, oids=oids)) + schema, pprinter = generic_decoder() + obj, tail = schema().decode( + der, + ctx=( + None if args.defines_by_path is None else + {"defines_by_path": obj_by_path(args.defines_by_path)} + ), + ) + print(pprinter( + obj, + oids=oids, + with_colours=True if args.with_colours else False, + )) if tail != b"": print("\nTrailing data: %s" % hexenc(tail))