mfenniak/generate_dialogs.py

## generate_dialogs.py
import pydot
import subprocess
import sys
import re

print """<!DOCTYPE html>
<html>
<head>
    <title>Idempotency Concept Roadmap Prototype</title>
    <link href="https://mathieu.fenniak.net/wp-content/uploads/2013/05/bootstrap.min_.css" rel="stylesheet" media="screen">
    <script type='text/javascript' src='https://mathieu.fenniak.net/wp-includes/js/jquery/jquery.js?ver=1.8.3'></script>
    <script type='text/javascript' src='https://mathieu.fenniak.net/wp-content/plugins/google-analyticator/external-tracking.min.js?ver=6.4.4.2'></script>
    <script type="text/javascript" src="https://mathieu.fenniak.net/wp-content/uploads/2013/05/bootstrap.min_.js"></script>

    <style type="text/css">
        svg, html, body {
            background: #cccccc;
        }
        .node ellipse, .node polygon {
            fill: white;
        }
        .node:hover text {
            fill: green;
            cursor: pointer;
        }
        .node:hover ellipse, .node:hover polygon {
            stroke: green;
            cursor: pointer;
            stroke-width: 3px;
            stroke-linejoin: round;
        }
        .edge:hover path {
            stroke: green;
            cursor: pointer;
            stroke-width: 3px;
        }
        .edge:hover polygon {
            fill: green;
            stroke: green;
            cursor: pointer;
            stroke-width: 3px;
        }
        .explanation {
            display: none;
        }
    </style>
    <script type="text/javascript">
        var analyticsFileTypes = [''];
        var analyticsEventTracking = 'enabled';
        var _gaq = _gaq || [];
        //_gaq.push(['_setAccount', '...']); // removed my own google analytics ID
        _gaq.push(['_trackPageview']);
        (function() {
            var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
            ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
            var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
        })();

        jQuery(function() {

            jQuery("div.modal").on("hide", function() { window.location.hash = ""; });

            jQuery('.node, .edge').click(function(event) {
                var target = jQuery(event.target).parent("g").attr("id");
                var explanation_hash = "#explanation_" + target;
                var explanation = jQuery(explanation_hash);
                if (explanation.length == 1) {
                    _gaq.push(["_trackEvent", "Roadmap", "Open", explanation_hash]);
                    explanation.modal("show");
                    window.location.hash = explanation_hash;
                }
            });

            jQuery(".modal-body a[href]").each(function() {
                var self = jQuery(this);
                jQuery(self).click(function(event) {
                    var href = self.attr("href");
                    if (href.indexOf("#") == 0) {
                        self.parents("div.modal").modal("hide");
                        _gaq.push(["_trackEvent", "Roadmap", "Open", href]);
                        jQuery(href).modal("show");
                    }
                });
            });

            if (window.location.hash.indexOf("#") == 0) {
                var hash = jQuery(window.location.hash);
                if (hash.length != 0) {
                    _gaq.push(["_trackEvent", "Roadmap", "Open", window.location.hash]);
                    hash.modal("show");
                }
            }
        });
    </script>
</head>

<body>
"""

comments = {
    "node1": """
        <p>An idempotent API operation is an operation that produces the same effect no matter how many times it is executed.</p>
        <p>For example, an API operation that sets a user's name is a typically idempotent operation.  Whether it occurs one time or a thousand times, the effect of the operation is that the user's name will be set to the target value.</p>
        <p>Idempotent operations do not necessarily return the exact same response output every time they are invoked, they only need to have the same impact upon the system.  Deleting a resource is an example of this distinction; the first time you invoke the delete, the object is deleted.  The second time you attempt to delete it, there is no change in the state of the system, but the object remains deleted as requested.</p>
    """,
    "node2": """
        <p>Conformance to the HTTP 1.1 specification, <a href="http://tools.ietf.org/html/rfc2616">RFC2616</a>.</p>
    """,
    "node3": """
        <p>Concurrent "edit" API operations will not have one succeed, and then another succeed, causing the changes in the first operation to be erased by the second operation.</p>
    """,
    "node4": """
        <p>Less information will need to be stored and managed by the application.  Reducing server state is particularly relevant to removing information that isn't strictly business oriented, but is kept around only to aid the implementation of services or workflows.</p>
    """,
    "node5": """
        <p>Increased difficulty in developing the software.</p>
    """,
    "node6": """
        <p>The ability to safely retry requests that may or may not have been processed.  A typical temporary failure that would benefit from this capability is a glitch during a high-availability failover, a temporary networking problem, or a small load spike.</p>
    """,
    "node7": """
        <p>More API operations can be processed simultaneously without them conflicting with each other.</p>
    """,
    "node8": """
        <p>Data inconsistency errors are when logical or business constraints are violated by the data already stored and accepted into the system.  These types of errors tend to propagate into other aspects of the system where logic was not designed to handle the data in its inconsistent state.</p>
    """,
    "node9": """
        <p>Availability is essentially the opposite of downtime.  A system is available when clients are able to perform the expected tasks and functions using the system.</p>
    """,
    "node10": """
        <p>Scalability is the ability for a system to handle increased amounts of traffic, customers, and data with linearly increasing costs.</p>
        <p>Not all systems are scalable.  Unscalable systems have bottlenecks or restrictions that need to be solved, or they exhibit exponential growth in cost in relation to a linear growth in customers.  For example, a system is unscalable if adding 1,000 users would result in $10,000 in costs, but the next 1,000 users would result in $100,000 in costs.</p>
    """,
    "node11": """
        <p>The costs associated with testing the system are increased, typically in the area of person time.</p>
    """,
    "node12": """
        <p>The costs associated with maintaining the system and software are increased, typically in the area of developer time.</p>
    """,
    "node13": """
        <p>The costs associated with further product development, such as adding new features and fixing bugs, are increased.  Typically theses costs are increased in developer time.</p>
    """,
    "node14": """
        <p>The difficulty of developing an API client that works in the expected manner under a variety of extraneous circumstances is reduced.</p>
    """,
    "node15": """
        <p>The existing server resources used to run the system are utilized at a higher rate in order to accomplish more work without new resources.  This is, of course, only possible if the servers are not already completely utilized to their maximum.</p>
    """,
    "node16": """
        <p>Systems can automatically recover from failure if they're capable of classifying the type of the failure, classifying the type of the request that failed, and determining that there is a path of action that will result in a better outcome than returning an error.</p>
        <p>As an example, a load balancer like HAProxy is capable of redispatching requests if a backend server disconnects while processing a request.</p>
    """,
    "node17": """
        <p>Increased throughput is serving more API operations within the same period of time.</p>
    """,
    "node18": """
        <p>The system is capable of supporting more customers and growing beyond its current workload.</p>
    """,
    "node19": """
        <p>More money is going out the door.</p>
    """,
    "node20": """
        <p>Less time and money is being spend to develop the application system.</p>
    """,
    "node21": """
        <p>The usability or functionality of a potential mobile client, such as an iPhone or Android application, is improved.</p>
    """,
    "node22": """
        <p>Less money is being spent on server hardware and maintenance.</p>
    """,
    "node23": """
        <p>Data entered or manipulated by the customer does not disappear into the void without anyone knowing about it.</p>
    """,
    "node24": """
        <p>API operations performed by the client application take less time to roundtrip and process.</p>
    """,
    "node25": """
        <p>Less money is being made by the business.</p>
    """,
    "node26": """
        <p>Less money is going out the door.</p>
    """,
    "node27": """
        <p>Customers are not pissed off by the way your application works, or doesn't work.  Customer frustration is a very common problem with applications, and it can often occur without anyone knowing about it until it reaches a breaking point where a customer cannot stand anymore.</p>
    """,
    "node28": """
        <p>Fewer customers leaving your application to find alternatives.</p>
    """,
    "node29": """
        <p>More money is coming in from customers.</p>
    """,
    "node30": """
        <p>More money is being made by the business.</p>
    """,
    "edge1": """
        <p>For an HTTP API, idempotency of some HTTP methods is a requirement of <a href="http://tools.ietf.org/html/rfc2616#section-9.1.2">RFC2616 &sect; 9.1.2</a>.</p>
        <p>The HTTP methods GET, HEAD, PUT, and DELETE are required by the specification to be idempotent.  The methods OPTIONS and TRACE are expected to be idempotent by virtue of having no side-effects.</p>
        <p>The method POST is commonly used for any operations that are <strong>not</strong> idempotent.</p>
    """,
    "edge2": """
        <p>Idempotent operations can be safely retried after any type of failure that might be a temporary failure.  If the operation has already been completed successfully but the response was lost, then the idempotency of the operation guarantees that invoking it a second time is safe.  If it had not been completed, then it would be processed as expected.</p>
    """,
    "edge3": """
        <p>By ensuring that it is safe to recover from errors by retrying requests, it becomes far easier to develop a reliable API client.  The client can retry requests without user intervention, and without requiring the user to resync their state and start an operation from scratch.</p>
    """,
    "edge4": """
        <p>As long as it is safe to recover from errors by retrying, it's possible to do so automatically.</p>
    """,
    "edge5": """
        <p>Mobile clients are a notoriously difficult environment to build software in.  Limited CPU, memory, and storage capabilities are just a small part of the challenge; the unreliable networking is the biggest difficulty.  The easier it is to build a reliable API client, the easier it will be to build a high-quality mobile client.</p>
    """,
    "edge6": """
        <p>If your mobile application doesn't suck, customers won't be as frustrated with it.</p>
    """,
    "edge7": """
        <p>There will be cases where a request failure occurs that a client cannot retry.  If you have an outage that affects your system's upstream networking, then the client issuing requests may not be immediately capable of retrying those requests, and other intervening forces may prevent the client from ever retrying that request.</p>
        <p>An automatic failure recovery system can prevent the end-user from losing data that they expected, desired, and required to be entered into your system.</p>
    """,
    "edge8": """
        <p>Even though you're providing an API as a service to your customers, it's likely that you're going to have to develop clients using it to.  In fact, you probably want to, just to "dogfood" your own work.</p>
        <p>Designing your API such that it is easier to develop reliable clients is going to end up reducing your own development efforts in building that client software.</p>
    """,
    "edge9": """
        <p>Since development costs are obviously costs, it only follows that reducing them would result in an overall reduction in costs.</p>
    """,
    "edge10": """
        <p>As profit is generally your revenue minus your costs, all other things being equal, reducing costs will increase profit.</p>
    """,
    "edge11": """
        <p>Often a system that's designed for to support idempotent API operations will also prevent conflicting modification operations as well.  It is <strong>not</strong> a strict requirement though, but there is a tendency for one implementation to address both issues.</p>
        <p>A common way to implement an idempotent edit operation would include identifying the state of the resource or object before its edited, such as with a version number or a timestamp.  In an HTTP API, this can be accomplished with the <a href="http://tools.ietf.org/html/rfc2616#section-14.24">If-Match header</a> using a resource's ETag.  This makes the operation both:</p>
        <dl>
            <dt>Idempotent</dt>
            <dd>It's completely safe for the edit operation to be processed multiple times.  The second or later invocations won't modify the object, but the object will still be in the requested state.</dd>
            <dt>Prevent conflicting edits</dt>
            <dd>Two API request sources attempting to act on the same resource at the same time can't clobber each other; one will succeed, and the second will receive notice that the state has changed since the edit operation was assembled.</dd>
        </dl>
    """,
    "edge12": """
        <p>Conflicting edits being processed can result in data loss.  Preventing conflicting edits will prevent this form of data loss.</p>
        <p>An example of the data loss would be two administrators simultaneously editing a user profile.  The first admin changes the user's e-mail address, while the second admin changes the user's password.  Depending upon the API design, the lack of a mechanism to prevent conflicting edits will cause one of these changes to be disregarded when both updates are submitted to the system simultaneously.  One will be applied, then the second will be applied overwriting the first.</p>
    """,
    "edge13": """
        <p>Conflicting edits being processed can result in an inconsistent data state.  Preventing this requires either preventing conflicting edits, or it would require every API operation to perform validation checks that might be outside the scope of the operation being processed.</p>
        <p>Preventing conflicting edits will prevent some possible occurrences of data inconsistency.</p>
    """,
    "edge14": """
        <p>An alternative to preventing conflicting edits would be to serialize requests, either on a per-server, per-tenant, or per-user basis depending upon what is appropriate for the application.  If this type of serialization can be avoided because conflicting edits are prevented, then request concurrency levels can be higher.</p>
    """,
    "edge15": """
        <p>Preventing data inconsistency will prevent application errors or confusing, nonsensical output in response to API operations.  This will reduce the likelihood of customers being frustrated by these outputs.</p>
    """,
    "edge16": """
        <p>Customers will get frustrated if the impact of their API operations are silently lost because they'll have difficulty figuring out if they're responsible for the errors, or your system is.  Avoiding that scenario is going to reduce the likelihood of customer frustration.</p>
    """,
    "edge17": """
        <p>The ability to serve more requests simultaneously will, all other things being equal, generally increase the number of requests that can be served in a given time period.</p>
        <p>However, there is usually a tipping point at which higher concurrency may result in lower throughput if your system spends too much time managing concurrent requests, or the concurrency level exceeds the threshold of the system.</p>
    """,
    "edge18": """
        <p>Most server hardware is optimized for processing multiple operations at one time.  By improving the concurrency of your API system, you're able to take better advantage of the hardware you have, leading to increased utilization.</p>
    """,
    "edge19": """
        <p>All other things being equal, reducing server costs will reduce the overall costs of running a business.</p>
    """,
    "edge21": """
        <p>Improving the throughput of request processing in your system will reduce the amount of time clients are waiting for API operations to complete.  This should make the responsiveness of your API better, which will allow client applications to accomplish more work and be more responsive.</p>
    """,
    "edge22": """
        <p>Particularly for interactive clients, improving the performance of an API client will reduce the amount of time spent waiting for API requests to be processed, which will alleviate customer frustration.  A typical application of this will be a mobile client; the faster it performs, the more confident users feel.</p>
    """,
    "edge23": """
        <p>Frustrated customers are customers that you're more likely to lose, especially if the cost of migrating to an alternative is relatively low.  Many customers will actually suffer a lot of frustration before taking on a migration project, especially in the B2B service space, but it's usually not a great idea to test this theory intentionally.</p>
    """,
    "edge24": """
        <p>For the vast majority of businesses, customers are the source of revenue.  Losing customers means losing revenue, so preventing customer loss will prevent revenue loss.</p>
    """,
    "edge25": """
        <p>As profit is generally your revenue minus your costs, all other things being equal, increasing revenue will increase profit.</p>
    """,
    "edge26": """
        <p>Designing an API's idempotent operations requires a design where API operations are fully autonomous.  The content of the API request must be logically complete and independent from server-side state in order for requests to be reappliable.  As a result, this style of API design tends to require less server-side state.</p>
        <p>The reduction of server-side state is particularly true when compared to distributed transaction coordination, which is sometimes considered an alternative to an idempotent API design.  Distributed transactions require very coordinated shared state between multiple application servers; all components of that state are completely absent in an idempotent API.</p>
    """,
    "edge27": """
        <p>Shared server state is a common bottleneck in scaling applications.  Instead of scaling one component, such as the system's application servers, there is now a requirement to scale two systems simultaneously, as you'll need to scale your shared state services at the same time.</p>
        <p>By reducing the amount of shared server state, you eliminate a scaling hurdle entirely.</p>
    """,
    "edge28": """
        <p>Shared server state is often stored in a separate system than your traditional data store because it requires different access patterns and a higher write workload.  As a result, the use of a shared server state storage is an additional point-of-failure for the system.  By reducing or removing shared server state, this point-of-failure is eliminated as well.</p>
    """,
    "edge29": """
        <p>As long as you have the ability to scale your system, you have the ability to support more customers.</p>
    """,
    "edge30": """
        <p>As customers are most businesses source of revenue, the ability to support more customers will correspond to the ability to generate more revenue.</p>
    """,
    "edge31": """
        <p>Most API operations are easy to design and develop in an idempotent manner.  But, there are always exceptions, and some API operations will be more difficult to develop in this way, and therefore will result in a longer implementation time and a higher development cost.</p>
        <p>A simple example of a more difficult idempotent operation is the idempotent creation of objects or resources, particularly those that do not have natural primary keys.</p>
    """,
    "edge32": """
        <p>Increasing development costs will increase overall costs.</p>
    """,
    "edge33": """
        <p>Since revenue is generally profit minus costs, an increase in costs will result in an increase in revenue.</p>
    """,
    "edge34": """
        <p>Although for many API operations idempotency is just a matter of design, for some it will increase the complexity of implementing the operation.  The operation may be required to compare the expected state of objects or resources with the current state to verify that the operation has not already been processed, for example.</p>
    """,
    "edge35": """
        <p>Maintenance expenses are usually unavoidable on any development project.  Additional features, tweaks, and bug fixes are the obvious maintenance costs, but every line of code in an application is a maintenance cost waiting to happen.</p>
        <p>Complex implementations, even if implemented flawlessly, are more difficult and expensive to maintain in the future.  Development staff will have increased training and ramp-up time to work on complex systems, and higher levels of quality control will be required for maintenance work relative to a simpler system.</p>
    """,
    "edge36": """
        <p>Complex implementations tend to be more difficult to accurately and completely test.  There are additional costs involved in communicating the complexity between departments, and time spent developing, verifying, and implementing test plans.</p>
    """,
    "edge37": """
        <p>As maintenance costs are still costs, increasing them will increase your overall costs.</p>
    """,
    "edge38": """
        <p>As testing costs are still costs, increasing them will increase your overall costs.</p>
    """,
    "edge39": """
        <p>Some developers using your API will have expectations about its behavior based upon the HTTP specification.  When there's a mismatch between your API and their expectations, it can be a source of confusion and frustration.</p>
        <p>Specification expectations can also be coded into tools, frameworks, proxies, and other intermediaries between developers and your API.  As an example, it would be spec conforming for an API framework to reissue PUT or DELETE requests automatically if they fail, based upon the expectation that these methods will be idempotent as per the HTTP spec.</p>
    """,
    "edge40": """
        <p>Increasing the utilization of server resources will reduce any premature hardware acquisition costs.</p>
    """,
    "edge41": """
        <p>Customers get frustrated when the systems they're trying to use are not available.  Increasing availability will reduce the potential for this frustration.</p>
    """,
}

comments_missing = 0

data = pydot.dot_parser.parse_dot_data(open("idempotent.dot").read())
nodes = data.get_nodes()
nodes.sort(key=lambda n: int(n.get_id()[5:-1]) if n.get_id() != None else 0)

for node in nodes:
    if node.get_name() == "node":
        continue

    print """
    <div id="explanation_%(id)s" class="modal hide fade">
        <div class="modal-header">
            <button type="button" class="close" data-dismiss="modal" aria-hidden="true">&times;</button>
            <h3 id="myModalLabel">%(label)s</h3>
        </div>
        <div class="modal-body">""" % {
        "id": node.get_id().replace('"', ''),
        "label": node.get_label().replace('"', '').replace("\\n", " "),
    }

    node_id = node.get_id().replace('"', '')
    if node_id in comments:
        print comments[node_id]
    else:
        comments_missing += 1
        print >>sys.stderr, "Node %s doesn't have any comment yet." % node_id

    name = node.get_name()
    benefits = []
    influenced_by = []
    tradeoffs = []
    for e in data.get_edges():
        if e.get_source() == name:
            if e.get_color() == "red":
                tradeoffs.append(e)
            else:
                benefits.append(e)
        elif e.get_destination() == name:
            influenced_by.append(e)
    benefits.sort(key=lambda e: data.get_node(e.get_destination())[0].get_label())
    influenced_by.sort(key=lambda e: data.get_node(e.get_source())[0].get_label())
    tradeoffs.sort(key=lambda e: data.get_node(e.get_destination())[0].get_label())

    if benefits:
        print "            <p>Benefits:",
        for b in benefits:
            target_node = data.get_node(b.get_destination())[0]
            end = ", "
            if b == benefits[-1]:
                end = "."
            print """<a href="#explanation_%(id)s">%(label)s</a>%(end)s""" % {"id": b.get_id().replace('"', ''), "label": target_node.get_label().replace('"', '').replace("\\n", " "), "end": end},
        print "</p>"

    if influenced_by:
        print "            <p>Influenced by:",
        for b in influenced_by:
            target_node = data.get_node(b.get_source())[0]
            end = ", "
            if b == influenced_by[-1]:
                end = "."
            print """<a href="#explanation_%(id)s">%(label)s</a>%(end)s""" % {"id": b.get_id().replace('"', ''), "label": target_node.get_label().replace('"', '').replace("\\n", " "), "end": end},
        print "</p>"

    if tradeoffs:
        if node.get_name() == "idempotency":
            print "            <p>Trade-offs:",
        else:
            print "            <p>Is a cause of:",
        for b in tradeoffs:
            target_node = data.get_node(b.get_destination())[0]
            end = ", "
            if b == tradeoffs[-1]:
                end = "."
            print """<a href="#explanation_%(id)s">%(label)s</a>%(end)s""" % {"id": b.get_id().replace('"', ''), "label": target_node.get_label().replace('"', '').replace("\\n", " "), "end": end},
        print "</p>"

    print """        </div>
    </div>"""


edges = data.get_edges()
edges.sort(key=lambda n: int(n.get_id()[5:-1]) if n.get_id() != None else 0)

for edge in edges:
    edge_id = edge.get_id().replace('"', '')
    if edge_id in comments:
        comment = comments[edge_id]
    else:
        comment = ""
        comments_missing += 1
        print >>sys.stderr, "Edge %s doesn't have any comment yet." % edge_id
    print """
    <div id="explanation_%(id)s" class="modal hide fade">
        <div class="modal-header">
            <button type="button" class="close" data-dismiss="modal" aria-hidden="true">&times;</button>
            <h3 id="myModalLabel">%(source_label)s -> %(destination_label)s</h3>
        </div>
        <div class="modal-body">
            %(comments)s
            <p>Links: <a href="#explanation_%(source_id)s">%(source_label)s</a> and <a href="#explanation_%(destination_id)s">%(destination_label)s</a>.</p>
        </div>
    </div>""" % {
        "id": edge.get_id().replace('"', ''),
        "comments": comment,
        "source_id": data.get_node(edge.get_source())[0].get_id().replace('"', ''),
        "source_label": data.get_node(edge.get_source())[0].get_label().replace('"', '').replace("\\n", " "),
        "destination_id": data.get_node(edge.get_destination())[0].get_id().replace('"', ''),
        "destination_label": data.get_node(edge.get_destination())[0].get_label().replace('"', '').replace("\\n", " "),
    }

proc = subprocess.Popen(["dot", "-Tsvg", "idempotent.dot"], stdout=subprocess.PIPE)
stdoutdata, stderrdata = proc.communicate()

title_regex = re.compile(r"<title>[^<]+</title>")

start = False
for line in stdoutdata.split("\n"):
    if start:
        print title_regex.sub("", line)
    elif line.startswith("<svg"):
        start = True
        print line

print """
</body>
</html>
"""

if comments_missing != 0:
    print >>sys.stderr, "%s comments missing.  More work to do!" % comments_missing