Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions deb/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ZIPPED := aptS3.py gnupg.py debian/*


all: requires compress

requires:
pip install -t . -r requirements.txt

compress:
zip code.zip $(ZIPPED)
80 changes: 80 additions & 0 deletions deb/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@

# AWS Lambda APT repository manager for S3

Rewrite of [szinck/s3apt](https://github.com/szinck/s3apt) with a few changes and extra features - Release file is being generated and is signed with GPG key provided

## Setting up S3 and Lambda

Clone the repo and get all other required files
```
git clone https://github.com/tactycal/lambdaRepos.git
cd lambdaRepos/deb
pip install -t . -r requirements.txt
```

Compress all needed files
```
zip code.zip aptS3.py gnupg.py debian/*
```

Presuming you already have GPG key generated export public key
```
gpg --export-secret-key > secret.key
```

Create new lambda function, set handler to **s3apt.lambda_handler** and the triggers to:

* Object Created, suffix 'deb'
* Object Removed, suffix 'deb'
* If you are using certain directory as a repo, set it as prefix

Upload `code.zip` to lambda function

Set the environment variables

| Key | Value |
| --- | ---|
| PUBLIC | True/False |
| GPG_KEY | File |
| GPG_PASS | GPG key password |
| BUCKET_NAME | Bucket Name |
| CACHE_PREFIX | Directory |

**PUBLIC** Set to True for the outputs to be publicly readable

**GPG_KEY** Location of your GPG private key from root of the bucket (e.g. secret/private.key)

**GPG_PASS** Password of private key uploaded to GPG_KEY (Note: environental variables are/can be encripted using KMS keys)

**BUCKET_NAME** Name of the bucket. Should be the same as the one selected in triggers and the one you're using for repository

**CACHE_PREFIX** Path to folder for packages cache(e.g. deb/cache)


Make folder in your S3 bucket with the same name as CACHE_PREFIX variable

Upload secret key file to location you specified as GPG_KEY

Upload .deb file to desired folder, lambda function should now keep your repository up to date

## Setting up apt

First time set up
```
sudo echo "deb https://s3.$AWS_SERVER.amazonaws.com/$BUCKET_NAME/$PATH_TO_FOLDER_WITH_DEBIAN_FILES /" > /etc/apt/sources.list
#an example of link "https://s3.eu-central-1.amazonaws.com/testbucket/repo"
#add public key to trusted sources - you have to export public key or use key server
apt-key add <path to key>
sudo apt update
sudo apt install <packages>
```

Upgrading package
```
sudo apt update
sudo apt upgrade
```

## Notes

.deb, Release and Package files are and should be publicly accessible for previously mentioned method of setting up apt's sources list to work, if you don't want them to be, then change PUBLIC in environment variables to False and refer to szinck's guide [here](http://webscale.plumbing/managing-apt-repos-in-s3-using-lambda)
9 changes: 9 additions & 0 deletions deb/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
boto3==1.3.1
botocore==1.4.41
docutils==0.12
futures==3.0.5
jmespath==0.9.0
python-dateutil==2.5.3
python-debian==0.1.28
six==1.10.0
python-gnupg==0.4.1
278 changes: 278 additions & 0 deletions deb/s3apt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,278 @@
from __future__ import print_function
from time import gmtime, strftime
import urllib
import boto3
import botocore
import tempfile
import tarfile
import debian.arfile
import hashlib
import re
import sys
import os
import gnupg

def lambda_handler(event, context):
print('Starting lambda function')
#Get bucket and key info
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8')

if bucket == os.environ['BUCKET_NAME'] and key.endswith(".deb"):
#Build packages file
if event['Records'][0]['eventName'].startswith('ObjectCreated'):
s3 = boto3.resource('s3')
deb_obj = s3.Object(bucket_name=bucket, key=key)
print("S3 Notification of new key. Ensuring cached control data exists: %s" % (str(deb_obj)))
get_cached_control_data(deb_obj)

prefix = "/".join(key.split('/')[0:-1]) + '/'
#Update packages file
rebuild_package_index(prefix)

#Build Release file
build_release_file(prefix)

#Sign Release file
sign_release_file(prefix)


def get_cached_control_data(deb_obj):
#gets debian control data
s3 = boto3.resource('s3')
etag = deb_obj.e_tag.strip('"')

cache_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=os.environ['CACHE_PREFIX'] + '/' + etag)
exists = True
try:
control_data = cache_obj.get()['Body'].read()
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == 'NoSuchKey':
exists = False
else:
raise(e)

if not exists:
control_data = read_control_data(deb_obj)
cache_obj.put(Body=control_data)

return control_data

def read_control_data(deb_obj):
fd, tmp = tempfile.mkstemp()
fh = os.fdopen(fd, "wb")
s3fh = deb_obj.get()['Body']
size = 1024*1024
while True:
dat = s3fh.read(size)
fh.write(dat)
if len(dat) < size:
break
fh.close()

try:
ctrl = get_control_data(tmp)
pkg_rec = format_package_record(ctrl, tmp)
return pkg_rec
finally:
os.remove(tmp)

def get_control_data(debfile):
ar = debian.arfile.ArFile(debfile)

control_fh = ar.getmember('control.tar.gz')

tar_file = tarfile.open(fileobj=control_fh, mode='r:gz')

# control file can be named different things
control_file_name = [x for x in tar_file.getmembers() if x.name in ['control', './control']][0]

control_data = tar_file.extractfile(control_file_name).read().strip()
# Strip out control fields with blank values. This tries to allow folded
# and multiline fields to pass through. See the debian policy manual for
# more info on folded and multiline fields.
# https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-binarycontrolfiles
lines = control_data.strip().split("\n")
filtered = []
for line in lines:
# see if simple field
if re.search(r"^\w[\w\d_-]+\s*:", line):
k, v = line.split(':', 1)
if v.strip() != "":
filtered.append(line)
else:
# otherwise folded or multiline, just pass it through
filtered.append(line)

return "\n".join(filtered)

def format_package_record(ctrl, fname):
pkgrec = ctrl.strip().split("\n")

stat = os.stat(fname)
pkgrec.append("Size: %d" % (stat.st_size))

md5, sha1, sha256 = checksums(fname)
pkgrec.append("MD5sum: %s" % (md5))
pkgrec.append("SHA1: %s" % (sha1))
pkgrec.append("SHA256: %s" % (sha256))

return "\n".join(pkgrec)

def checksums(fname):

fh = open(fname, "rb")

md5 = hashlib.md5()
sha1 = hashlib.sha1()
sha256 = hashlib.sha256()

size = 1024 * 1024
while True:
dat = fh.read(size)
md5.update(dat)
sha1.update(dat)
sha256.update(dat)
if len(dat) < size:
break

fh.close()

return md5.hexdigest(), sha1.hexdigest(), sha256.hexdigest()

def rebuild_package_index(prefix):
# Get all .deb keys in directory
# Get the cache entry
# build package file
deb_names = []
deb_objs = []

print("REBUILDING PACKAGE INDEX: %s" % (prefix))
s3 = boto3.resource('s3')
for obj in s3.Bucket(os.environ['BUCKET_NAME']).objects.filter(Prefix=prefix):
if not obj.key.endswith(".deb"):
continue
deb_objs.append(obj)
deb_names.append(obj.key.split('/')[-1])

if not len(deb_objs):
print("NOT BUILDING EMPTY PACKAGE INDEX")
return

# See if we need to rebuild the package index
metadata_pkghash = get_package_index_hash(prefix)
calcd_pkghash = calc_package_index_hash(deb_names)
print("calcd_pkghash=%s, metadata_pkghash=%s" % (calcd_pkghash, metadata_pkghash))
if metadata_pkghash == calcd_pkghash:
print("PACKAGE INDEX ALREADY UP TO DATE")
return

pkginfos = []
for obj in deb_objs:
print(obj.key)

pkginfo = get_cached_control_data(obj)
if obj.key.startswith(prefix):
filename = obj.key[len(prefix):]
pkginfo = pkginfo + "\n%s\n" % ("Filename: %s" % filename)
else:
pkginfo = pkginfo + "\n%s\n" % ("Filename: %s" % obj.key)

pkginfos.append(pkginfo)

package_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Packages")
print("Writing package index: %s" % (str(package_index_obj)))
if os.environ['PUBLIC'] == 'True' :
acl = 'public-read'
else:
acl = 'private'
package_index_obj.put(Body="\n".join(sorted(pkginfos)), Metadata={'packages-hash': calcd_pkghash}, ACL=acl)

print("DONE REBUILDING PACKAGE INDEX")

def calc_package_index_hash(deb_names):
"""
Calculates a hash of all the given deb file names. This is deterministic so
we can use it for short-circuiting.
"""

md5 = hashlib.md5()
md5.update("\n".join(sorted(deb_names)))
return md5.hexdigest()

def get_package_index_hash(prefix):
"""
Returns the md5 hash of the names of all the packages in the index. This can be used
to detect if all the packages are represented without having to load a control data cache
file for each package.can be used
to detect if all the packages are represented without having to load a control data cache
file for each package.
"""
s3 = boto3.resource('s3')
try:
print("looking for existing Packages file: %sPackages" % prefix)
package_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + 'Packages')
return package_index_obj.metadata.get('packages-hash', None)
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == '404':
return None
else:
raise(e)

def build_release_file(prefix):
"""
gets info from Package, get the sums and puts them into file
"""
s3 = boto3.client('s3')
release_file = ""
s3.download_file(os.environ['BUCKET_NAME'], prefix + "Packages", '/tmp/Packages')
md5, sha1, sha256 = checksums("/tmp/Packages")

time = 'Date: ' + strftime("%a, %d %b %Y %X UTC", gmtime())
stat = os.stat("/tmp/Packages")
release_file = release_file +(time + '\nMD5sum:\n ' + md5)
for i in range(0,17-len(str(stat.st_size))):
release_file = release_file +(' ')
release_file = release_file +(str(stat.st_size) + ' Packages\nSHA1:\n '+sha1 )
for i in range(0,17-len(str(stat.st_size))):
release_file = release_file +(' ')
release_file = release_file +(str(stat.st_size) + ' Packages\nSHA256:\n '+sha256 )
for i in range(0,17-len(str(stat.st_size))):
release_file = release_file +(' ')
release_file = release_file +(str(stat.st_size) + ' Packages')

s3 = boto3.resource('s3')

if os.environ['PUBLIC'] == 'True' :
acl = 'public-read'
else:
acl = 'private'

release_index_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Release")
print("Writing Release file: %s" % (str(release_index_obj)))
release_index_obj.put(Body=release_file, ACL=acl)

def sign_release_file(prefix):
'''
Using gpg password assigned in env variable `GPG_PASS` and key, which's file directory is
assigned in env variable `GPG_KEY`
'''
gpg = gnupg.GPG(gnupghome='/tmp/gpgdocs')
s3 = boto3.client('s3')
s3.download_file(os.environ['BUCKET_NAME'], os.environ['GPG_KEY'], '/tmp/gpgdocs/sec.key')
s3.download_file(os.environ['BUCKET_NAME'], prefix + 'Release', '/tmp/gpgdocs/Release')

sec = gpg.import_keys(open('/tmp/gpgdocs/sec.key').read(-1))
print("Key import returned: ")
print(sec.results)
stream = open('/tmp/gpgdocs/Release')
signed = gpg.sign_file(stream, passphrase=os.environ['GPG_PASS'], clearsign=True, detach=True, binary=False)

if os.environ['PUBLIC'] == 'True' :
acl = 'public-read'
else:
acl = 'private'
s3 = boto3.resource('s3')
print(signed)
sign_obj = s3.Object(bucket_name=os.environ['BUCKET_NAME'], key=prefix + "Release.gpg")
sign_obj.put(Body=str(signed), ACL=acl)
10 changes: 10 additions & 0 deletions rpm/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ZIPPED := s3rpm.py gnupg.py pyrpm/* pyrpm/tools/*


set: requires package

requires:
pip3 install -t . -r requirements.txt --upgrade

package:
zip code.zip $(ZIPPED)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test target would be useful.

Loading