Source code for ckan_api_client.tests.utils.generate

#!/usr/bin/env python

# Generate some dummy data, for testing purposes

import json
import random
import string

from .strings import gen_random_id, gen_picture


[docs]def generate_organization():
    """
    Generate a random organization object, with:

    - ``name``, random, example: ``"org-abc123"``
    - ``title``, random, example: ``"Organization abc123"``
    - ``description``, random
    - ``image``, url pointing to a random-generated pic
    """

    random_id = gen_random_id(10)
    return {
        "name": "org-{0}".format(random_id),  # Used as key
        "title": "Organization {0}".format(random_id),
        "description": "Description of organization {0}".format(random_id),
        "image_url": gen_picture(random_id),
        # "extras": [],
        # "tags": [],
    }


[docs]def generate_group():
    """
    Generate a random group object, with:

    - ``name``, random, example: ``"grp-abc123"``
    - ``title``, random, example: ``"Group abc123"``
    - ``description``, random
    - ``image``, url pointing to a random-generated pic
    """

    random_id = gen_random_id(10)
    return {
        "name": "grp-{0}".format(random_id),  # Used as key
        "title": "Group {0}".format(random_id),
        "description": "Description of group {0}".format(random_id),
        "image_url": gen_picture(random_id),
        # "extras": [],
        # "tags": [],
    }


[docs]def generate_dataset():
    """
    Generate a dataset, populated with random data.

    **Fields:**

    - ``name`` -- random string, in the form ``dataset-{random}``
    - ``title`` -- random string, in the form ``Dataset {random}``

    - ``author`` -- random-generated name
    - ``author_email`` -- random-generated email address
    - ``license_id`` -- random license id. One of ``cc-by``, ``cc-zero``,
      ``cc-by-sa`` or ``notspecified``.
    - ``maintainer`` -- random-generated name
    - ``maintainer_email`` -- random-generated email address
    - ``notes`` -- random string, containing some markdown
    - ``owner_org`` -- set to None
    - ``private`` -- Fixed to ``False``
    - ``tags`` -- random list of tags (strings)
    - ``type`` -- fixed string: ``"dataset"``
    - ``url`` -- random url of dataset on an "external source"

    - ``extras`` -- dictionary containing random key / value pairs
    - ``groups`` -- empty list
    - ``resources`` -- list of random resources
    - ``relationships`` -- empty list

    .. note::
        The ``owner_org`` and ``groups`` fields will be blank,
        as they must match with existing groups / organizations
        and we don't have access to database from here (nor
        is it in the scope of this function!)
    """

    random_id = gen_random_id(15)
    license_id = random.choice((
        'cc-by', 'cc-zero', 'cc-by-sa', 'notspecified'))
    resources = []
    for i in xrange(random.randint(1, 8)):
        resources.append(generate_resource())
    return {
        # ------------------------------------------------------------
        # WARNING! This is the **internal** id of the external
        # service, which will need to be moved to
        # dataset['extras']['_harvest_source_id']
        # ------------------------------------------------------------
        # "id": random_id,

        # Name should be taken as a "suggestion": in case of naming conflict
        # with an existing dataset, it just be changed (todo: how?)
        "name": "dataset-{0}".format(random_id),

        "title": "Dataset {0}".format(random_id),
        "url": "http://www.example.com/dataset/{0}".format(random_id),
        "type": "dataset",

        "maintainer_email": "maintainer-{0}@example.com".format(random_id),
        "maintainer": "Maintainer {0}".format(random_id),

        "author_email": "author-{0}@example.com".format(random_id),
        "author": "Author {0}".format(random_id),

        "license_id": license_id,

        "private": False,
        "notes": "Notes for **dataset** {0}.".format(random_id),

        # "state": "active",  # automatic

        # Let's generate some tags
        "tags": generate_tags(random.randint(0, 10)),

        # Let's put some random stuff in here..
        "extras": generate_extras(random.randint(0, 30)),

        # Some dummy resources
        "resources": resources,

        # Need to be randomized later, to match existing groups
        "groups": [],

        # Need to be randomized later, to match existing orgs
        "owner_org": None,

        # WTF is this thing?
        "relationships": [],
    }


[docs]def generate_resource():
    """
    Generate a random resource, to be put in a dataset.

    **Fields:**

    - ``url`` -- resource URL on an "external source"
    - ``resource_type`` -- one of ``api`` or ``file``
    - ``name`` -- random-generated name
    - ``format`` -- a random format (eg: ``csv``, ``json``)
    - ``description`` -- random generated string
    """

    random_id = gen_random_id()
    fmt = random.choice(['csv', 'json'])
    url = 'http://example.com/resource/{0}.{1}'.format(random_id, fmt)
    return {
        "url": url,
        "resource_type": random.choice(['api', 'file']),
        "name": "resource-{0}".format(random_id),
        "format": fmt.upper(),
        "description": "Resource {0}".format(random_id),
    }


[docs]def generate_tags(amount):
    """
    Generate ``amount`` random tags.
    Each tag is in the form ``tag-<random-int>``.

    :return: a list of tag names
    """

    return [
        'tag-{0:03d}'.format(random.randint(0, 50))
        for _ in xrange(amount)
    ]


[docs]def generate_extras(amount):
    """
    Generate a dict with ``amount`` random key/value pairs.
    """
    pairs = [
        ('key-{0:03d}'.format(random.randint(0, 50)),
         'value {0:03d}'.format(random.randint(0, 50)))
        for _ in xrange(amount)]
    return dict(pairs)


[docs]def generate_data(dataset_count=50, orgs_count=10, groups_count=15):
    """
    Generate a bunch of random data.
    Will also associate datasets with random organizations / groups.

    :return: a dict with the ``dataset``, ``organization`` and
        ``group`` keys; each of them a dict of ``{key: object}``.
    """

    data = {'dataset': {}, 'organization': {}, 'group': {}}

    for _ in xrange(orgs_count):
        org = generate_organization()
        data['organization'][org['name']] = org

    for _ in xrange(groups_count):
        group = generate_group()
        data['group'][group['name']] = group

    for _ in xrange(dataset_count):
        dataset = generate_dataset()
        dataset['groups'] = [
            random.choice(data['group'].keys())
            for x in xrange(random.randint(1, 5))
        ]
        dataset['owner_org'] = random.choice(data['organization'].keys())
        data['dataset'][dataset['id']] = dataset

    return data


[docs]def generate_id(length=10):
    pool = string.ascii_lowercase + string.digits
    return ''.join(random.choice(pool) for _ in xrange(length))


if __name__ == '__main__':
    import os
    import sys

    if len(sys.argv) > 1:
        destdir = sys.argv[1]
    else:
        destdir = os.getcwd()
    destdir = os.path.abspath(destdir)

    if not os.path.exists(destdir):
        os.makedirs(destdir)
    if not os.path.isdir(destdir):
        raise ValueError("Not a directory: {0}".format(destdir))
    if len(os.listdir(destdir)):
        raise ValueError("Directory not empty: {0}".format(destdir))

    print("Generating data")
    data = generate_data()

    os.chdir(destdir)
    for n in ('dataset', 'group', 'organization'):
        print("Writing {0}".format(n))
        os.makedirs(n)
        for key, val in data[n].iteritems():
            print("    * {0} {1}".format(n, key))
            with open(os.path.join(destdir, n, key), 'w') as f:
                json.dump(val, f)