Rewrite 'generate_sslroots' w/o OpenSSL.

OpenSSL removed ability to generate C code:
a18cf8fc63

CL rewrites generation script to use pure Python asn1crypto library.

The changes in generated code leading to huge diff in generated file:
- Certificate array names are based on certificate fingerprints instead
of semi-human readable names, which were not referenced externally;
- Order of arrays in generated file matches the order of certificates
as they are appeared in source pem file. Previously re-ordering happen
due to writing temporary files on disk;


Bug: webrtc:11710
Change-Id: Ie7a97b3658f6ccb397f0fd0c21d341934a2cc12e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/304642
Commit-Queue: Yury Yarashevich <yura.yaroshevich@gmail.com>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#40039}
This commit is contained in:
Yury Yarashevich 2023-05-09 17:03:48 +02:00 committed by WebRTC LUCI CQ
parent 2d7424305d
commit c7ff896999
2 changed files with 213 additions and 191 deletions

View file

@ -86,3 +86,10 @@ wheel: <
name: "infra/python/wheels/requests-py2_py3" name: "infra/python/wheels/requests-py2_py3"
version: "version:2.13.0" version: "version:2.13.0"
> >
# Used by:
# tools_webrtc/sslroots
wheel: <
name: "infra/python/wheels/asn1crypto-py2_py3"
version: "version:1.0.1"
>

View file

@ -1,195 +1,238 @@
#!/usr/bin/env vpython3 #!/usr/bin/env vpython3
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
# Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. # Copyright (c) 2023 The WebRTC project authors. All Rights Reserved.
# #
# Use of this source code is governed by a BSD-style license # Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source # that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found # tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may # in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree. # be found in the AUTHORS file in the root of the source tree.
"""This is a tool to transform a crt file into a C/C++ header. import argparse
import logging
Usage: from pathlib import Path
python3 generate_sslroots.py certfile.pem [--verbose | -v] [--full_cert | -f] import tempfile
from typing import Tuple, Any, List, ByteString
Arguments: from datetime import datetime, timezone
-v Print output while running. from hashlib import sha256
-f Add public key and certificate name. Default is to skip and reduce from urllib.request import urlopen
generated file size. from asn1crypto import pem, x509
The supported cert files are:
- Google: https://pki.goog/roots.pem
- Mozilla: https://curl.se/docs/caextract.html
"""
import subprocess
from optparse import OptionParser
import os
import re
_GENERATED_FILE = 'ssl_roots.h' _GENERATED_FILE = 'ssl_roots.h'
_PREFIX = '__generated__'
_EXTENSION = '.crt'
_SUBJECT_NAME_ARRAY = 'subject_name'
_SUBJECT_NAME_VARIABLE = 'SubjectName'
_PUBLIC_KEY_ARRAY = 'public_key'
_PUBLIC_KEY_VARIABLE = 'PublicKey'
_CERTIFICATE_ARRAY = 'certificate'
_CERTIFICATE_VARIABLE = 'Certificate'
_CERTIFICATE_SIZE_VARIABLE = 'CertificateSize'
_INT_TYPE = 'size_t'
_CHAR_TYPE = 'unsigned char* const'
_VERBOSE = 'verbose'
_MOZILLA_BUNDLE_CHECK = '## Certificate data from Mozilla as of:'
def main(): def main():
"""The main entrypoint.""" parser = argparse.ArgumentParser(
parser = OptionParser('usage %prog FILE') description='This is a tool to transform a crt file '
parser.add_option('-v', '--verbose', dest='verbose', action='store_true') f'into a C/C++ header: {_GENERATED_FILE}.')
parser.add_option('-f', '--full_cert', dest='full_cert', action='store_true')
options, args = parser.parse_args()
if len(args) < 1:
parser.error('No crt file specified.')
return
root_dir, bundle_type = _SplitCrt(args[0], options)
_GenCFiles(root_dir, options, bundle_type)
_Cleanup(root_dir)
parser.add_argument('source_path_or_url',
help='File path or URL to PEM storage file. '
'The supported cert files are: '
'- Google: https://pki.goog/roots.pem; '
'- Mozilla: https://curl.se/ca/cacert.pem')
parser.add_argument('-v',
'--verbose',
dest='verbose',
action='store_true',
help='Print output while running')
parser.add_argument('-f',
'--full_cert',
dest='full_cert',
action='store_true',
help='Add public key and certificate name. '
'Default is to skip and reduce generated file size.')
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.WARNING)
def _SplitCrt(source_file, options): with tempfile.TemporaryDirectory() as temp_dir:
sub_file_blocks = [] cert_file = Path(temp_dir) / "cacert.pem"
label_name = ''
prev_line = None if args.source_path_or_url.startswith(
root_dir = os.path.dirname(os.path.abspath(source_file)) + '/' 'https://') or args.source_path_or_url.startswith('http://'):
_PrintOutput(root_dir, options) _DownloadCertificatesStore(args.source_path_or_url, cert_file)
lines = None destination_dir = Path.cwd()
with open(source_file) as f:
lines = f.readlines()
mozilla_bundle = any(l.startswith(_MOZILLA_BUNDLE_CHECK) for l in lines)
for line in lines:
if line.startswith('#'):
if mozilla_bundle:
continue
if line.startswith('# Label: '):
sub_file_blocks.append(line)
label = re.search(r'\".*\"', line)
temp_label = label.group(0)
end = len(temp_label) - 1
label_name = _SafeName(temp_label[1:end])
if mozilla_bundle and line.startswith('==='):
sub_file_blocks.append(line)
label_name = _SafeName(prev_line)
elif line.startswith('-----END CERTIFICATE-----'):
sub_file_blocks.append(line)
new_file_name = root_dir + _PREFIX + label_name + _EXTENSION
_PrintOutput('Generating: ' + new_file_name, options)
new_file = open(new_file_name, 'w')
for out_line in sub_file_blocks:
new_file.write(out_line)
new_file.close()
sub_file_blocks = []
else: else:
sub_file_blocks.append(line) source_path = Path(args.source_path_or_url)
prev_line = line cert_file.write_bytes(source_path.read_bytes())
return root_dir, 'Mozilla' if mozilla_bundle else 'Google' destination_dir = source_path.parent
logging.debug('Stored certificate from %s into %s', args.source_path_or_url,
cert_file)
header_file = destination_dir / _GENERATED_FILE
digest, certificates = _LoadCertificatesStore(cert_file)
_GenerateCHeader(header_file, args.source_path_or_url, digest, certificates,
args.full_cert)
logging.debug('Did generate %s from %s [%s]', header_file,
args.source_path_or_url, digest)
def _GenCFiles(root_dir, options, bundle_type): def _DownloadCertificatesStore(pem_url: str, destination_file: Path):
output_header_file = open(root_dir + _GENERATED_FILE, 'w') with urlopen(pem_url) as response:
output_header_file.write(_CreateOutputHeader(bundle_type)) pem_file = response.read()
if options.full_cert: logging.info('Got response with status [%d]: %s', response.status, pem_url)
subject_name_list = _CreateArraySectionHeader(_SUBJECT_NAME_VARIABLE,
_CHAR_TYPE, options)
public_key_list = _CreateArraySectionHeader(_PUBLIC_KEY_VARIABLE,
_CHAR_TYPE, options)
certificate_list = _CreateArraySectionHeader(_CERTIFICATE_VARIABLE,
_CHAR_TYPE, options)
certificate_size_list = _CreateArraySectionHeader(_CERTIFICATE_SIZE_VARIABLE,
_INT_TYPE, options)
for _, _, files in os.walk(root_dir): if destination_file.parent.exists():
for current_file in files: logging.debug('Creating directory and it\'s parents %s',
if current_file.startswith(_PREFIX): destination_file.parent)
prefix_length = len(_PREFIX) destination_file.parent.mkdir(parents=True, exist_ok=True)
length = len(current_file) - len(_EXTENSION) if destination_file.exists():
label = current_file[prefix_length:length] logging.debug('Unlink existing file %s', destination_file)
filtered_output, cert_size = _CreateCertSection(root_dir, current_file, destination_file.unlink(missing_ok=True)
label, options)
output_header_file.write(filtered_output + '\n\n\n')
if options.full_cert:
subject_name_list += _AddLabelToArray(label, _SUBJECT_NAME_ARRAY)
public_key_list += _AddLabelToArray(label, _PUBLIC_KEY_ARRAY)
certificate_list += _AddLabelToArray(label, _CERTIFICATE_ARRAY)
certificate_size_list += (' %s,\n') % (cert_size)
if options.full_cert: destination_file.write_bytes(pem_file)
subject_name_list += _CreateArraySectionFooter() logging.info('Stored downloaded %d bytes pem file to `%s`', len(pem_file),
output_header_file.write(subject_name_list) destination_file)
public_key_list += _CreateArraySectionFooter()
output_header_file.write(public_key_list)
certificate_list += _CreateArraySectionFooter()
output_header_file.write(certificate_list)
certificate_size_list += _CreateArraySectionFooter()
output_header_file.write(certificate_size_list)
output_header_file.write(_CreateOutputFooter())
output_header_file.close()
def _Cleanup(root_dir): def _LoadCertificatesStore(
for f in os.listdir(root_dir): source_file: Path) -> Tuple[str, List[x509.Certificate]]:
if f.startswith(_PREFIX): pem_bytes = source_file.read_bytes()
os.remove(root_dir + f)
certificates = [
x509.Certificate.load(der)
for type, _, der in pem.unarmor(pem_bytes, True) if type == 'CERTIFICATE'
]
digest = f'sha256:{sha256(pem_bytes).hexdigest()}'
logging.debug('Loaded %d certificates from %s [%s] ', len(certificates),
source_file, digest)
return digest, certificates
def _CreateCertSection(root_dir, source_file, label, options): def _GenerateCHeader(header_file: Path, source: str, source_digest: str,
command = 'openssl x509 -in %s%s -noout -C' % (root_dir, source_file) certificates: List[x509.Certificate], full_cert: bool):
_PrintOutput(command, options) header_file.parent.mkdir(parents=True, exist_ok=True)
output = subprocess.getstatusoutput(command)[1] with header_file.open('w') as output:
decl_block = 'unsigned char .*_(%s|%s|%s)' %\ output.write(_CreateOutputHeader(source, source_digest))
(_SUBJECT_NAME_ARRAY, _PUBLIC_KEY_ARRAY, _CERTIFICATE_ARRAY)
prog = re.compile(decl_block, re.IGNORECASE)
renamed_output = prog.sub('const unsigned char ' + label + r'_\1', output)
filtered_output = '' named_certificates = [(cert,
cert_block = '^const unsigned char.*?};$' f'kCertificateWithFingerprint_{cert.sha256.hex()}')
prog2 = re.compile(cert_block, re.IGNORECASE | re.MULTILINE | re.DOTALL) for cert in certificates]
if not options.full_cert:
filtered_output = prog2.sub('', renamed_output, count=2)
else:
filtered_output = renamed_output
cert_size_block = r'\d\d\d+' names = list(map(lambda x: x[1], named_certificates))
prog3 = re.compile(cert_size_block, re.MULTILINE | re.VERBOSE) unique_names = list(set(names))
result = prog3.findall(renamed_output) if len(names) != len(unique_names):
cert_size = result[len(result) - 1] raise RuntimeError(
f'There are {len(names) - len(unique_names)} non-unique '
'certificate names generated. Generator script must be '
'fixed to handle collision.')
return filtered_output, cert_size for cert, name in named_certificates:
output.write(_CreateCertificateMetadataHeader(cert))
if full_cert:
output.write(
_CArrayConstantDefinition('unsigned char',
f'{name}_subject_name',
_CreateHexList(cert.subject.dump()),
max_items_per_line=16))
output.write('\n')
output.write(
_CArrayConstantDefinition('unsigned char',
f'{name}_public_key',
_CreateHexList(cert.public_key.dump()),
max_items_per_line=16))
output.write('\n')
output.write(
_CArrayConstantDefinition('unsigned char',
f'{name}_certificate',
_CreateHexList(cert.dump()),
max_items_per_line=16))
output.write('\n\n')
if full_cert:
output.write(
_CArrayConstantDefinition('unsigned char* const',
'kSSLCertSubjectNameList',
[f'{name}_subject_name' for name in names]))
output.write('\n\n')
output.write(
_CArrayConstantDefinition('unsigned char* const',
'kSSLCertPublicKeyList',
[f'{name}_public_key' for name in names]))
output.write('\n\n')
output.write(
_CArrayConstantDefinition('unsigned char* const',
'kSSLCertCertificateList',
[f'{name}_certificate' for name in names]))
output.write('\n\n')
output.write(
_CArrayConstantDefinition(
'size_t', 'kSSLCertCertificateSizeList',
[f'{len(cert.dump())}' for cert, _ in named_certificates]))
output.write('\n\n')
output.write(_CreateOutputFooter())
def _CreateOutputHeader(bundle_type): def _CreateHexList(items: ByteString) -> List[str]:
output = ('/*\n' """
' * Copyright 2004 The WebRTC Project Authors. All rights ' Produces list of strings each item is hex literal of byte of source sequence
'reserved.\n' """
' *\n' return [f'0x{item:02X}' for item in items]
' * Use of this source code is governed by a BSD-style license\n'
' * that can be found in the LICENSE file in the root of the '
'source\n' def _CArrayConstantDefinition(type_name: str,
' * tree. An additional intellectual property rights grant can be ' array_name: str,
'found\n' items: List[Any],
' * in the file PATENTS. All contributing project authors may\n' max_items_per_line: int = 1) -> str:
' * be found in the AUTHORS file in the root of the source tree.\n' """
' */\n\n' Produces C array definition like: `const type_name array_name = { items };`
'#ifndef RTC_BASE_SSL_ROOTS_H_\n' """
'#define RTC_BASE_SSL_ROOTS_H_\n\n' return (f'const {type_name} {array_name}[{len(items)}]='
'// This file is the root certificates in C form.\n\n' f'{_CArrayInitializerList(items, max_items_per_line)};')
'// It was generated with the following script:\n'
'// tools_webrtc/sslroots/generate_sslroots.py'
' %s_CA_bundle.pem\n\n' def _CArrayInitializerList(items: List[Any],
'// clang-format off\n' max_items_per_line: int = 1) -> str:
'// Don\'t bother formatting generated code,\n' """
'// also it would breaks subject/issuer lines.\n\n' % bundle_type) Produces C initializer list like: `{\\nitems[0], \\n ...}`
"""
return '{\n' + '\n'.join([
','.join(items[i:i + max_items_per_line]) + ','
for i in range(0, len(items), max_items_per_line)
]) + '\n}'
def _CreateCertificateMetadataHeader(cert: x509.Certificate) -> str:
return (f'/* subject: {cert.subject.human_friendly} */\n'
f'/* issuer: {cert.issuer.human_friendly} */\n'
f'/* link: https://crt.sh/?q={cert.sha256.hex()} */\n')
def _CreateOutputHeader(source_path_or_url: str, source_digest: str) -> str:
now_utc = datetime.now(timezone.utc).replace(microsecond=0)
output = (
'/*\n'
f' * Copyright {now_utc.year} The WebRTC Project Authors. All rights '
'reserved.\n'
' *\n'
' * Use of this source code is governed by a BSD-style license\n'
' * that can be found in the LICENSE file in the root of the '
'source\n'
' * tree. An additional intellectual property rights grant can be '
'found\n'
' * in the file PATENTS. All contributing project authors may\n'
' * be found in the AUTHORS file in the root of the source tree.\n'
' */\n\n'
'#ifndef RTC_BASE_SSL_ROOTS_H_\n'
'#define RTC_BASE_SSL_ROOTS_H_\n\n'
'// This file is the root certificates in C form.\n\n'
f'// It was generated at {now_utc.isoformat()} by the following script:\n'
'// `tools_webrtc/sslroots/generate_sslroots.py '
f'{source_path_or_url}`\n\n'
'// clang-format off\n'
'// Don\'t bother formatting generated code,\n'
'// also it would breaks subject/issuer lines.\n\n'
f'// Source bundle `{source_path_or_url}` digest is [{source_digest}]\n\n'
)
return output return output
@ -197,33 +240,5 @@ def _CreateOutputFooter():
return '// clang-format on\n\n#endif // RTC_BASE_SSL_ROOTS_H_\n' return '// clang-format on\n\n#endif // RTC_BASE_SSL_ROOTS_H_\n'
def _CreateArraySectionHeader(type_name, type_type, options):
output = ('const %s kSSLCert%sList[] = {\n') % (type_type, type_name)
_PrintOutput(output, options)
return output
def _AddLabelToArray(label, type_name):
return ' %s_%s,\n' % (label, type_name)
def _CreateArraySectionFooter():
return '};\n\n'
def _SafeName(original_file_name):
bad_chars = ' -./\\()áéíőú\r\n'
replacement_chars = ''
for _ in bad_chars:
replacement_chars += '_'
translation_table = str.maketrans(bad_chars, replacement_chars)
return original_file_name.translate(translation_table)
def _PrintOutput(output, options):
if options.verbose:
print(output)
if __name__ == '__main__': if __name__ == '__main__':
main() main()