Source code for backend.application

from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from os import path
import logging
from datetime import datetime, timedelta
from peewee import fn
import peewee
import smtplib
import socket
import tornado.web
import tornado
import random
import string
import uuid
import math
import re

import db
import handlers
import settings
from modules.browser import utils


[docs]def build_dataset_structure(dataset_version, user=None, dataset=None): if dataset is None: dataset = dataset_version.dataset r = db.build_dict_from_row(dataset) r['version'] = db.build_dict_from_row(dataset_version) r['version']['available_from'] = r['version']['available_from'].strftime('%Y-%m-%d') r['has_image'] = dataset.has_image() if user: r['is_admin'] = user.is_admin(dataset) if user.has_access(dataset, dataset_version.version): r['authorization_level'] = 'has_access' elif user.has_requested_access(dataset): r['authorization_level'] = 'has_requested_access' else: r['authorization_level'] = 'no_access' return r
[docs]class QuitHandler(handlers.UnsafeHandler):
[docs] def get(self): ioloop = tornado.ioloop.IOLoop.instance() ioloop.stop()
[docs]class GetSchema(handlers.UnsafeHandler): """ Returns the schema.org, and bioschemas.org, annotation for a given url. This function behaves quite differently from the rest of the application as the structured data testing tool had trouble catching the schema inject when it went through AngularJS. The solution for now has been to make this very general function that "re-parses" the 'url' request parameter to figure out what information to return. """
[docs] def get(self): dataset = None version = None beacon = None try: url = self.get_argument('url') match = re.match(".*/dataset/([^/]+)(/version/([^/]+))?", url) if match: dataset = match.group(1) version = match.group(3) beacon = re.match(".*/dataset/.*/beacon", url) except tornado.web.MissingArgumentError: pass base = {"@context": "http://schema.org/", "@type": "DataCatalog", "name": "SweFreq", "alternateName": [ "The Swedish Frequency resource for genomics" ], "description": "The Swedish Frequency resource for genomics (SweFreq) is a website developed to make genomic datasets more findable and accessible in order to promote collaboration, new research and increase public benefit.", "url": "https://swefreq.nbis.se/", "provider": { "@type": "Organization", "name": "National Bioinformatics Infrastructure Sweden", "alternateName": [ "NBIS", "ELIXIR Sweden" ], "logo": "http://nbis.se/assets/img/logos/nbislogo-green.svg", "url": "https://nbis.se/" }, "datePublished": "2016-12-23", "dateModified": "2017-02-01", "license": { "@type": "CreativeWork", "name": "GNU General Public License v3.0", "url": "https://www.gnu.org/licenses/gpl-3.0.en.html" } } if dataset: dataset_schema = {'@type':"Dataset"} try: dataset_version = db.get_dataset_version(dataset, version) if dataset_version is None: self.send_error(status_code=404) return if dataset_version.available_from > datetime.now(): # If it's not available yet, only return if user is admin. if not (self.current_user and self.current_user.is_admin(dataset_version.dataset)): self.send_error(status_code=403) return base_url = "%s://%s" % (self.request.protocol, self.request.host) dataset_schema['url'] = base_url + "/dataset/" + dataset_version.dataset.short_name dataset_schema['@id'] = dataset_schema['url'] dataset_schema['name'] = dataset_version.dataset.short_name dataset_schema['description'] = dataset_version.description dataset_schema['identifier'] = dataset_schema['name'] dataset_schema['citation'] = dataset_version.ref_doi base["dataset"] = dataset_schema except db.DatasetVersion.DoesNotExist as e: logging.error("Dataset version does not exist: {}".format(e)) except db.DatasetVersionCurrent.DoesNotExist as e: logging.error("Dataset does not exist: {}".format(e)) if beacon: base = {"@context": "http://schema.org", "@id": "https://swefreq.nbis.se/api/beacon-elixir/", # or maybe "se.nbis.swefreq" as in the beacon api? "@type": "Beacon", "dataset": [dataset_schema], "dct:conformsTo": "https://bioschemas.org/specifications/drafts/Beacon/", "name": "Swefreq Beacon", "provider": base["provider"], "supportedRefs": ["GRCh37"], "description": "Beacon API Web Server based on the GA4GH Beacon API", "version": "1.1.0", # beacon api version "aggregator": False, "url": "https://swefreq.nbis.se/api/beacon-elixir/" } self.finish(base)
[docs]class ListDatasets(handlers.UnsafeHandler):
[docs] def get(self): # List all datasets available to the current user, earliear than now OR # versions that are available in the future that the user is admin of. user = self.current_user ret = [] if user: futures = (db.DatasetVersion.select() .join(db.Dataset) .join(db.DatasetAccess) .where(db.DatasetVersion.available_from > datetime.now(), db.DatasetAccess.user == user, db.DatasetAccess.is_admin)) for f in futures: dataset = build_dataset_structure(f, user) dataset['future'] = True ret.append( dataset ) for version in db.DatasetVersionCurrent.select(): dataset = build_dataset_structure(version, user) dataset['current'] = True ret.append( dataset ) self.finish({'data':ret})
[docs]class GetDataset(handlers.UnsafeHandler):
[docs] def get(self, dataset, version=None): dataset, version = utils.parse_dataset(dataset, version) user = self.current_user future_version = False version = db.get_dataset_version(dataset, version) if version is None: self.send_error(status_code=404) return if version.available_from > datetime.now(): future_version = True ret = build_dataset_structure(version, user) ret['version']['var_call_ref'] = version.reference_set.reference_build ret['future'] = future_version self.finish(ret)
[docs]class ListDatasetVersions(handlers.UnsafeHandler):
[docs] def get(self, dataset): dataset, _ = utils.parse_dataset(dataset) user = self.current_user dataset = db.get_dataset(dataset) versions = (db.DatasetVersion.select(db.DatasetVersion.version, db.DatasetVersion.available_from) .where(db.DatasetVersion.dataset == dataset)) logging.info("ListDatasetVersions") data = [] found_current = False versions = sorted(versions, key=lambda version: version.version) for v in reversed(versions): current = False future = False # Skip future versions unless admin if v.available_from > datetime.now(): if not (user and user.is_admin(dataset)): continue future = True # Figure out if this is the current version if not found_current and v.available_from < datetime.now(): found_current = True current = True data.insert(0, { 'name': v.version, 'available_from': v.available_from.strftime('%Y-%m-%d'), 'current': current, 'future': future, }) self.finish({'data': data})
[docs]class DatasetFiles(handlers.AuthorizedHandler):
[docs] def get(self, dataset, ds_version=None): dataset, ds_version = utils.parse_dataset(dataset, ds_version) dataset_version = db.get_dataset_version(dataset, ds_version) if dataset_version is None: self.send_error(status_code=404) return ret = [] for f in dataset_version.files: d = db.build_dict_from_row(f) d['dirname'] = path.dirname(d['uri']) d['human_size'] = format_bytes(d['file_size']) ret.append(d) self.finish({'files': ret})
[docs]def format_bytes(nbytes): postfixes = ['b', 'Kb', 'Mb', 'Gb', 'Tb', 'Pb', 'Eb'] exponent = math.floor( math.log(nbytes) / math.log(1000) ) return "{} {}".format( round(nbytes/1000**exponent, 2), postfixes[exponent])
[docs]class Collection(handlers.UnsafeHandler):
[docs] def get(self, dataset, ds_version=None): dataset, _ = utils.parse_dataset(dataset) dataset = db.get_dataset(dataset) collections = {} for sample_set in dataset.sample_sets: collection = sample_set.collection if not collection.name in collections: collections[collection.name] = { 'sample_sets': [], 'ethnicity': collection.ethnicity, } collections[collection.name]['sample_sets'].append( db.build_dict_from_row(sample_set) ) ret = { 'collections': collections, 'study': db.build_dict_from_row(dataset.study) } ret['study']['publication_date'] = ret['study']['publication_date'].strftime('%Y-%m-%d') self.finish(ret)
[docs]class GetUser(handlers.UnsafeHandler):
[docs] def get(self): user = self.current_user ret = { 'user': None, 'email': None, 'login_type': 'none' } if user: ret = { 'user': user.name, 'email': user.email, 'affiliation': user.affiliation, 'country': user.country, } self.finish(ret)
[docs]class CountryList(handlers.UnsafeHandler):
[docs] def get(self): self.write({'countries': [{'name': c} for c in self.country_list]})
@property def country_list(self): return ["Afghanistan", "Albania", "Algeria", "American Samoa", "Andorra", "Angola", "Anguilla", "Antarctica", "Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bermuda", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "British Indian Ocean Territory", "British Virgin Islands", "Brunei", "Bulgaria", "Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Canada", "Cape Verde", "Cayman Islands", "Central African Republic", "Chad", "Chile", "China", "Christmas Island", "Cocos Islands", "Colombia", "Comoros", "Cook Islands", "Costa Rica", "Croatia", "Cuba", "Curacao", "Cyprus", "Czech Republic", "Democratic Republic of the Congo", "Denmark", "Djibouti", "Dominica", "Dominican Republic", "East Timor", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Ethiopia", "Falkland Islands", "Faroe Islands", "Fiji", "Finland", "France", "French Polynesia", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Gibraltar", "Greece", "Greenland", "Grenada", "Guam", "Guatemala", "Guernsey", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Hong Kong", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", "Ireland", "Isle of Man", "Israel", "Italy", "Ivory Coast", "Jamaica", "Japan", "Jersey", "Jordan", "Kazakhstan", "Kenya", "Kiribati", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia", "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Macau", "Macedonia", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", "Marshall Islands", "Mauritania", "Mauritius", "Mayotte", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia", "Montenegro", "Montserrat", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "Netherlands Antilles", "New Caledonia", "New Zealand", "Nicaragua", "Niger", "Nigeria", "Niue", "North Korea", "Northern Mariana Islands", "Norway", "Oman", "Pakistan", "Palau", "Palestine", "Panama", "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Pitcairn", "Poland", "Portugal", "Puerto Rico", "Qatar", "Republic of the Congo", "Reunion", "Romania", "Russia", "Rwanda", "Saint Barthelemy", "Saint Helena", "Saint Kitts and Nevis", "Saint Lucia", "Saint Martin", "Saint Pierre and Miquelon", "Saint Vincent and the Grenadines", "Samoa", "San Marino", "Sao Tome and Principe", "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Sint Maarten", "Slovakia", "Slovenia", "Solomon Islands", "Somalia", "South Africa", "South Korea", "South Sudan", "Spain", "Sri Lanka", "Sudan", "Suriname", "Svalbard and Jan Mayen", "Swaziland", "Sweden", "Switzerland", "Syria", "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Togo", "Tokelau", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan", "Turks and Caicos Islands", "Tuvalu", "U.S. Virgin Islands", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay", "Uzbekistan", "Vanuatu", "Vatican", "Venezuela", "Vietnam", "Wallis and Futuna", "Western Sahara", "Yemen", "Zambia", "Zimbabwe" ]
[docs]class RequestAccess(handlers.SafeHandler):
[docs] def post(self, dataset): dataset, _ = utils.parse_dataset(dataset) user = self.current_user dataset = db.get_dataset(dataset) affiliation = self.get_argument("affiliation", strip=False) country = self.get_argument("country", strip=False) newsletter = self.get_argument("newsletter", strip=False) user.affiliation = affiliation user.country = country logging.info("Inserting into database: {}, {}".format(user.name, user.email)) try: with db.database.atomic(): user.save() # Save to database (da,_) = db.DatasetAccess.get_or_create( user = user, dataset = dataset ) da.wants_newsletter = newsletter da.save() db.UserAccessLog.create( user = user, dataset = dataset, action = 'access_requested' ) except peewee.OperationalError as e: logging.error("Database Error: {}".format(e))
[docs]class LogEvent(handlers.SafeHandler):
[docs] def post(self, dataset, event, target): dataset, _ = utils.parse_dataset(dataset) user = self.current_user if event == 'consent': user.save() dv = (db.DatasetVersion .select() .join(db.Dataset) .where( db.DatasetVersion.version == target, db.Dataset.short_name == dataset) .get()) db.UserConsentLog.create( user = user, dataset_version = dv, ) else: raise tornado.web.HTTPError(400, reason="Can't log that")
[docs]class ApproveUser(handlers.AdminHandler):
[docs] def post(self, dataset, email): dataset, _ = utils.parse_dataset(dataset) with db.database.atomic(): dataset = db.get_dataset(dataset) user = db.User.select().where(db.User.email == email).get() da = db.DatasetAccess.select().where( db.DatasetAccess.user == user, db.DatasetAccess.dataset == dataset ).get() da.has_access = True da.save() db.UserAccessLog.create( user = user, dataset = dataset, action = 'access_granted' ) try: msg = MIMEMultipart() msg['to'] = email msg['from'] = settings.from_address msg['subject'] = 'Swefreq access granted to {}'.format(dataset.short_name) msg.add_header('reply-to', settings.reply_to_address) body = """You now have access to the {} dataset Please visit https://swefreq.nbis.se/dataset/{}/download to download files. """.format(dataset.full_name, dataset.short_name) msg.attach(MIMEText(body, 'plain')) server = smtplib.SMTP(settings.mail_server) server.sendmail(msg['from'], [msg['to']], msg.as_string()) except smtplib.SMTPException as e: logging.error("Email error: {}".format(e)) except socket.gaierror as e: logging.error("Email error: {}".format(e)) self.finish()
[docs]class RevokeUser(handlers.AdminHandler):
[docs] def post(self, dataset, email): dataset, _ = utils.parse_dataset(dataset) with db.database.atomic(): dataset = db.get_dataset(dataset) user = db.User.select().where(db.User.email == email).get() db.UserAccessLog.create( user = user, dataset = dataset, action = 'access_revoked' )
def _build_json_response(query, access_for): json_response = [] for user in query: applyDate = '-' access = access_for(user) if not access: continue access = access[0] if access.access_requested: applyDate = access.access_requested.strftime('%Y-%m-%d') data = { 'user': user.name, 'email': user.email, 'affiliation': user.affiliation, 'country': user.country, 'newsletter': access.wants_newsletter, 'has_access': access.has_access, 'applyDate': applyDate } json_response.append(data) return json_response
[docs]class DatasetUsersPending(handlers.AdminHandler):
[docs] def get(self, dataset): dataset, _ = utils.parse_dataset(dataset) dataset = db.get_dataset(dataset) users = db.User.select() access = (db.DatasetAccessPending .select() .where( db.DatasetAccessPending.dataset == dataset, )) query = peewee.prefetch(users, access) self.finish({'data': _build_json_response(query, lambda u: u.access_pending)})
[docs]class DatasetUsersCurrent(handlers.AdminHandler):
[docs] def get(self, dataset): dataset, _ = utils.parse_dataset(dataset) dataset = db.get_dataset(dataset) users = db.User.select() access = (db.DatasetAccessCurrent .select() .where( db.DatasetAccessCurrent.dataset == dataset, )) query = peewee.prefetch(users, access) self.finish({'data': _build_json_response( query, lambda u: u.access_current)})
[docs]class UserDatasetAccess(handlers.SafeHandler):
[docs] def get(self): user = self.current_user ret = { "data": [], } for access in user.access_pending: d = {} d['short_name'] = access.dataset.short_name d['wants_newsletter'] = access.wants_newsletter d['is_admin'] = False d['access'] = False ret['data'].append( d ) for access in user.access_current: d = {} d['short_name'] = access.dataset.short_name d['wants_newsletter'] = access.wants_newsletter d['is_admin'] = access.is_admin d['access'] = True ret['data'].append( d ) self.finish(ret)
[docs]class SFTPAccess(handlers.SafeHandler): """ Creates, or re-enables, sFTP users in the database. """
[docs] def get(self): """ Returns sFTP credentials for the current user. """ if db.get_admin_datasets(self.current_user).count() <= 0: self.finish({'user':None, 'expires':None, 'password':None}) return password = None username = None expires = None # Check if an sFTP user exists for the current user try: data = self.current_user.sftp_user.get() username = data.user_name expires = data.account_expires.strftime("%Y-%m-%d %H:%M") except db.SFTPUser.DoesNotExist: # Otherwise return empty values pass self.finish({'user':username, 'expires':expires, 'password':password})
[docs] def post(self): """ Handles generation of new credentials. This function either creates a new set of sftp credentials for a user, or updates the old ones with a new password and expiry date. """ if db.get_admin_datasets(self.current_user).count() <= 0: self.finish({'user':None, 'expires':None, 'password':None}) return # Create a new password username = "_".join(self.current_user.name.split()) + "_sftp" password = self.generate_password() expires = datetime.today() + timedelta(days=30) # Check if an sFTP user exists for the current user when the database is ready passwd_hash = fn.encode(fn.digest(password, 'sha256'), 'hex') try: self.current_user.sftp_user.get() # if we have a user, update it db.SFTPUser.update(password_hash = passwd_hash, account_expires = expires ).where(db.SFTPUser.user == self.current_user).execute() except db.SFTPUser.DoesNotExist: # if there is no user, insert the user in the database db.SFTPUser.insert(user = self.current_user, user_uid = db.get_next_free_uid(), user_name = username, password_hash = passwd_hash, account_expires = expires ).execute() self.finish({'user':username, 'expires':expires.strftime("%Y-%m-%d %H:%M"), 'password':password})
[docs] def generate_password(self, size = 12): """ Generates a password of length 'size', comprised of random lowercase and uppercase letters, and numbers. """ chars = string.ascii_letters + string.digits return ''.join(random.SystemRandom().choice(chars) for _ in range(size))