jake-low/intro.md

## intro.md

      
    Raw
  

              intro.md
            
          
    This gist is a demonstration of an inconsistency between the google/protobuf Python and C++ implementations (in protobuf release version 2.5.0).
The inconsistency is in parsing of protobuf messages from strings. In C++, attempting to parse a message with one or more required fields fails if the input string is empty. In python, a message can always be parsed from the empty string, even if it has required fields.
Below you'll find a simple program. A Teacher object and a Student object are instantiated. The teacher administers a simple test to the student. They communicate via serialized protobuf messages. First, the teacher serializes a real MultipleChoiceQuestion message and the student replies to it. Then, the teacher attempts to "trick" the student by having them attempt to decode a MultipleChoiceQuestion from an empty string, instead of from a buffer containing a serialized message. The teacher checks whether the student "notices" that the message is empty (i.e. whether parsing fails) -- the student refuses to answer a question that can't be parsed.
In the C++ version of the program, the student can't parse a MultipleChoiceQuestion message from the empty string, and therefore refuses to reply with a MultipleChoiceAnswer (returning NULL instead).
In the Python version of the program, when the student tries to parse a MultipleChoiceQuestion message from the empty string, the parse succeeds. The student doesn't realize that parsing has failed (although they might realize if they called IsInitialized() which would return False). Therefore the student answers the teacher's "trick" question.

  
## multiplechoice.proto
message MultipleChoiceQuestion {
    required string question = 2;
    repeated string choices = 3;
    optional string hint = 4;
}

message MultipleChoiceAnswer {
    required uint32 choice = 2;
}

## test.cpp
#include "multiplechoice.pb.h"

#include <cassert>
#include <memory>
#include <stdio.h>
#include <string>

int charToIndex(char c)
{
    return ((int) c) - 97;
}

char indexToChar(int i)
{
    return (char)(i + 97);
}

class Student
{
public:
    /* Takes a serialized question, parses it, creates a
     * MultipleChoiceAnswer proto, and returns the serialized proto
     */
    std::unique_ptr<std::string> answerQuestion(std::string serialized_question)
    {
        MultipleChoiceQuestion question_proto;

        bool res = question_proto.ParseFromString(serialized_question);

        if (!res)
        {
            printf("Student: hey, that's not a real question!\n");
            return NULL;
        }
        else
        {
            MultipleChoiceAnswer answer_proto;
            answer_proto.set_choice(charToIndex('b'));

            printf("Student: umm, 'b'?\n");

            std::string * out = new std::string;

            bool res = answer_proto.SerializeToString(out);
            assert(res);

            return std::unique_ptr<std::string>(out);
        }
    }
};

class Teacher
{
public:
    /*
     * Takes a question string and a list of choices (strings), creates a
     * MultipleChoiceQuestion, and returns the serialized question.
     */
    std::unique_ptr<std::string> makeQuestion(
        std::string question,
        std::vector<std::string> choices,
        std::string * hint)
    {
        MultipleChoiceQuestion question_proto;
        question_proto.set_question(question);

        for (const std::string & choice : choices)
        {
            question_proto.add_choices(choice);
        }

        if (hint)
        {
            question_proto.set_hint(*hint);
        }

        std::string * out = new std::string;

        bool res = question_proto.SerializeToString(out);
        assert(res);

        return std::unique_ptr<std::string>(out);
    }

    /* Test the student to see if they can recognize an invalid (empty string)
     * protobuf.
     */
    void administerTest(Student & student)
    {
        printf("Teacher: let's begin the test.\n");

        std::unique_ptr<std::string> serialized_question = makeQuestion(
            "What's your favorite colour?",
            {"red", "yellow", "green", "blue"},
            NULL);

        printf("Teacher: what's your favorite colour: (a) red, (b) yellow, "
            "(c) green, or (d) blue?\n");

        std::unique_ptr<std::string> serialized_answer =
            student.answerQuestion(*serialized_question);

        if (!serialized_answer)
        {
            printf("Teacher: the student refused to answer the first "
                "question.\n");
        }
        else
        {
            MultipleChoiceAnswer answer;
            answer.ParseFromString(*serialized_answer);

            printf("Teacher: the student answered '%c' to the first "
                "question.\n", indexToChar(answer.choice()));
        }


        printf("Teacher: now I'll ask the student a question that's not even "
            "a real protobuf.\n");

        serialized_answer = student.answerQuestion("");

        if (!serialized_answer)
        {
            printf("Teacher: the student refused to answer a bad question.\n");
        }
        else
        {
            printf("Teacher: the student answered the fake question!\n");
        }
    }
};

int main()
{
    Teacher teacher;
    Student student;

    teacher.administerTest(student);
}

## test.cpp.output
Teacher: let's begin the test.
Teacher: what's your favorite colour: (a) red, (b) yellow, (c) green, or (d) blue?
Student: umm, 'b'?
Teacher: the student answered 'b' to the first question.
Teacher: now I'll ask the student a question that's not even a real protobuf.
[libprotobuf ERROR google/protobuf/message_lite.cc:123] Can't parse message of type "MultipleChoiceQuestion" because it is missing required fields: question
Student: hey, that's not a real question!
Teacher: the student refused to answer a bad question.

## test.py
import google.protobuf
import multiplechoice_pb2

def letterToIndex(letter):
    return ord(letter) - 97

def indexToLetter(index):
    return chr(index + 97)

class Student(object):
    def answerQuestion(self, serialized_question):
        """
        Takes a serialized question, parses it, creates a MultipleChoiceAnswer
        proto, and returns the serialized proto.
        """
        question_proto = multiplechoice_pb2.MultipleChoiceQuestion()

        try:
            question_proto.ParseFromString(serialized_question)
        except google.protobuf.message.DecodeError:
            print "Student: hey, that's not a real question!"
            return None

        # we've parsed the question, ensuring that it's a valid protobuf, but
        # we didn't study for this test, so we're just gonna go ahead and
        # answer 'b' and hope for the best.

        answer_proto = multiplechoice_pb2.MultipleChoiceAnswer()
        answer_proto.choice = letterToIndex('b')
        print "Student: ummm, 'b'?"

        return answer_proto.SerializeToString()

class Teacher(object):
    def makeQuestion(self, question, choices, hint=None):
        """
        Takes a question string and a list of choices (strings), creates a
        MultipleChoiceQuestion, and returns the serialized question.
        """
        question_proto = multiplechoice_pb2.MultipleChoiceQuestion()
        question_proto.question = question

        for choice in choices:
            question_proto.choices.append(choice)

        if hint is not None:
            question_proto.hint = hint

        return question_proto.SerializeToString()

    def administerTest(self, student):
        """
        Test the student to see if they can recognize an invalid (empty string)
        protobuf.
        """

        print "Teacher: let's begin the test."

        serialized_question = self.makeQuestion("What's your favorite colour?",
            ['red', 'yellow', 'green', 'blue'])

        print "Teacher: what's your favorite colour: (a) red, (b) yellow, " \
            "(c) green, or (d) blue?"

        serialized_answer = student.answerQuestion(serialized_question)

        if serialized_answer is None:
            print "Teacher: the student refused to answer the first question."
        else:
            answer = multiplechoice_pb2.MultipleChoiceAnswer()
            answer.ParseFromString(serialized_answer)

            print "Teacher: the student answered %r to the first question." \
                % indexToLetter(answer.choice)

        # now we'll try to trick the student by giving them an empty string
        # instead of a real question

        print "Teacher: now I'll ask the student a question that's not even " \
            "a real protobuf."

        serialized_question = ""

        serialized_answer = student.answerQuestion(serialized_question)

        if serialized_answer is None:
            print "Teacher: the student refused to answer a bad question"
        else:
            print "Teacher: the student answered the fake question!"

if __name__ == '__main__':
    teacher = Teacher()
    student = Student()

    teacher.administerTest(student)

## test.py.output
Teacher: let's begin the test.
Teacher: what's your favorite colour: (a) red, (b) yellow, (c) green, or (d) blue?
Student: ummm, 'b'?
Teacher: the student answered 'b' to the first question.
Teacher: now I'll ask the student a question that's not even a real protobuf.
Student: ummm, 'b'?
Teacher: the student answered the fake question!
	message MultipleChoiceQuestion {
	required string question = 2;
	repeated string choices = 3;
	optional string hint = 4;
	}

	message MultipleChoiceAnswer {
	required uint32 choice = 2;
	}
	#include "multiplechoice.pb.h"

	#include <cassert>
	#include <memory>
	#include <stdio.h>
	#include <string>

	int charToIndex(char c)
	{
	return ((int) c) - 97;
	}

	char indexToChar(int i)
	{
	return (char)(i + 97);
	}

	class Student
	{
	public:
	/* Takes a serialized question, parses it, creates a
	* MultipleChoiceAnswer proto, and returns the serialized proto
	*/
	std::unique_ptr<std::string> answerQuestion(std::string serialized_question)
	{
	MultipleChoiceQuestion question_proto;

	bool res = question_proto.ParseFromString(serialized_question);

	if (!res)
	{
	printf("Student: hey, that's not a real question!\n");
	return NULL;
	}
	else
	{
	MultipleChoiceAnswer answer_proto;
	answer_proto.set_choice(charToIndex('b'));

	printf("Student: umm, 'b'?\n");

	std::string * out = new std::string;

	bool res = answer_proto.SerializeToString(out);
	assert(res);

	return std::unique_ptr<std::string>(out);
	}
	}
	};

	class Teacher
	{
	public:
	/*
	* Takes a question string and a list of choices (strings), creates a
	* MultipleChoiceQuestion, and returns the serialized question.
	*/
	std::unique_ptr<std::string> makeQuestion(
	std::string question,
	std::vector<std::string> choices,
	std::string * hint)
	{
	MultipleChoiceQuestion question_proto;
	question_proto.set_question(question);

	for (const std::string & choice : choices)
	{
	question_proto.add_choices(choice);
	}

	if (hint)
	{
	question_proto.set_hint(*hint);
	}

	std::string * out = new std::string;

	bool res = question_proto.SerializeToString(out);
	assert(res);

	return std::unique_ptr<std::string>(out);
	}

	/* Test the student to see if they can recognize an invalid (empty string)
	* protobuf.
	*/
	void administerTest(Student & student)
	{
	printf("Teacher: let's begin the test.\n");

	std::unique_ptr<std::string> serialized_question = makeQuestion(
	"What's your favorite colour?",
	{"red", "yellow", "green", "blue"},
	NULL);

	printf("Teacher: what's your favorite colour: (a) red, (b) yellow, "
	"(c) green, or (d) blue?\n");

	std::unique_ptr<std::string> serialized_answer =
	student.answerQuestion(*serialized_question);

	if (!serialized_answer)
	{
	printf("Teacher: the student refused to answer the first "
	"question.\n");
	}
	else
	{
	MultipleChoiceAnswer answer;
	answer.ParseFromString(*serialized_answer);

	printf("Teacher: the student answered '%c' to the first "
	"question.\n", indexToChar(answer.choice()));
	}


	printf("Teacher: now I'll ask the student a question that's not even "
	"a real protobuf.\n");

	serialized_answer = student.answerQuestion("");

	if (!serialized_answer)
	{
	printf("Teacher: the student refused to answer a bad question.\n");
	}
	else
	{
	printf("Teacher: the student answered the fake question!\n");
	}
	}
	};

	int main()
	{
	Teacher teacher;
	Student student;

	teacher.administerTest(student);
	}
	Teacher: let's begin the test.
	Teacher: what's your favorite colour: (a) red, (b) yellow, (c) green, or (d) blue?
	Student: umm, 'b'?
	Teacher: the student answered 'b' to the first question.
	Teacher: now I'll ask the student a question that's not even a real protobuf.
	[libprotobuf ERROR google/protobuf/message_lite.cc:123] Can't parse message of type "MultipleChoiceQuestion" because it is missing required fields: question
	Student: hey, that's not a real question!
	Teacher: the student refused to answer a bad question.
	import google.protobuf
	import multiplechoice_pb2

	def letterToIndex(letter):
	return ord(letter) - 97

	def indexToLetter(index):
	return chr(index + 97)

	class Student(object):
	def answerQuestion(self, serialized_question):
	"""
	Takes a serialized question, parses it, creates a MultipleChoiceAnswer
	proto, and returns the serialized proto.
	"""
	question_proto = multiplechoice_pb2.MultipleChoiceQuestion()

	try:
	question_proto.ParseFromString(serialized_question)
	except google.protobuf.message.DecodeError:
	print "Student: hey, that's not a real question!"
	return None

	# we've parsed the question, ensuring that it's a valid protobuf, but
	# we didn't study for this test, so we're just gonna go ahead and
	# answer 'b' and hope for the best.

	answer_proto = multiplechoice_pb2.MultipleChoiceAnswer()
	answer_proto.choice = letterToIndex('b')
	print "Student: ummm, 'b'?"

	return answer_proto.SerializeToString()

	class Teacher(object):
	def makeQuestion(self, question, choices, hint=None):
	"""
	Takes a question string and a list of choices (strings), creates a
	MultipleChoiceQuestion, and returns the serialized question.
	"""
	question_proto = multiplechoice_pb2.MultipleChoiceQuestion()
	question_proto.question = question

	for choice in choices:
	question_proto.choices.append(choice)

	if hint is not None:
	question_proto.hint = hint

	return question_proto.SerializeToString()

	def administerTest(self, student):
	"""
	Test the student to see if they can recognize an invalid (empty string)
	protobuf.
	"""

	print "Teacher: let's begin the test."

	serialized_question = self.makeQuestion("What's your favorite colour?",
	['red', 'yellow', 'green', 'blue'])

	print "Teacher: what's your favorite colour: (a) red, (b) yellow, " \
	"(c) green, or (d) blue?"

	serialized_answer = student.answerQuestion(serialized_question)

	if serialized_answer is None:
	print "Teacher: the student refused to answer the first question."
	else:
	answer = multiplechoice_pb2.MultipleChoiceAnswer()
	answer.ParseFromString(serialized_answer)

	print "Teacher: the student answered %r to the first question." \
	% indexToLetter(answer.choice)

	# now we'll try to trick the student by giving them an empty string
	# instead of a real question

	print "Teacher: now I'll ask the student a question that's not even " \
	"a real protobuf."

	serialized_question = ""

	serialized_answer = student.answerQuestion(serialized_question)

	if serialized_answer is None:
	print "Teacher: the student refused to answer a bad question"
	else:
	print "Teacher: the student answered the fake question!"

	if __name__ == '__main__':
	teacher = Teacher()
	student = Student()

	teacher.administerTest(student)
	Teacher: let's begin the test.
	Teacher: what's your favorite colour: (a) red, (b) yellow, (c) green, or (d) blue?
	Student: ummm, 'b'?
	Teacher: the student answered 'b' to the first question.
	Teacher: now I'll ask the student a question that's not even a real protobuf.
	Student: ummm, 'b'?
	Teacher: the student answered the fake question!