Skip to content

Instantly share code, notes, and snippets.

@aaronanderson
Last active November 23, 2021 05:52
Show Gist options
  • Save aaronanderson/fc3a2452d554c1362f4f6d5bb4c815ea to your computer and use it in GitHub Desktop.
Save aaronanderson/fc3a2452d554c1362f4f6d5bb4c815ea to your computer and use it in GitHub Desktop.
Apache POI HSMF Outlook Calendar/Appointment Metadata extraction including recurrence settings
package outlook;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.security.MessageDigest;
import java.time.format.DateTimeFormatter;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.Base64;
import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.MAPIMessage.MESSAGE_CLASS;
import org.apache.poi.hsmf.datatypes.ByteChunk;
import org.apache.poi.hsmf.datatypes.Chunk;
import org.apache.poi.hsmf.datatypes.MAPIProperty;
import org.apache.poi.hsmf.datatypes.NameIdChunks.PredefinedPropertySet;
import org.apache.poi.hsmf.datatypes.PropertiesChunk;
import org.apache.poi.hsmf.datatypes.PropertyValue;
import org.apache.poi.hsmf.datatypes.StringChunk;
public class OutlookMsgParser {
//Compiles with Apache POI 5.1.0
//Drag and drop an outlook email or calendar event to a folder. Use the Java code below to parse the appointment and extract
//metadata from the appointment such as the meeting date, participants, and recurrence settings.
// Standard Outlook named properties can be found here:
// https://docs.microsoft.com/en-us/office/client-developer/outlook/mapi/mapi-constants
// Install Outlook Spy (https://www.dimastr.com/outspy/home.htm) to view the contents of .msg files.
//This tool can help visualize which properties are available and commonly populated in Outlook .msg files.
public static void main(String[] args) {
String emailPath = "/tmp/my_outlook_appointment.msg";
try (MAPIMessage msg = new MAPIMessage(emailPath)) {
String mid = null;
if (msg.getMainChunks().getMessageId() != null) {
mid = msg.getMainChunks().getMessageId().getValue();
} else if (msg.getMainChunks().getSubmissionChunk() != null) {
mid = msg.getMainChunks().getSubmissionChunk().getSubmissionId();
}
// digest the message id to create a more compact unique identifier
MessageDigest md = MessageDigest.getInstance("SHA-256");
final String fmid = mid;
Optional.ofNullable(mid).ifPresent(c -> System.out.format("Message ID: %s\n", Base64.getEncoder().encodeToString(md.digest(fmid.getBytes()))));
Optional.ofNullable(msg.getMessageDate()).ifPresent(c -> System.out.format("Date: %s\n", formatDate((GregorianCalendar) c)));
Optional.ofNullable(msg.getSubject()).ifPresent(s -> System.out.format("Subject: %s\n", s));
if (msg.getMessageClassEnum() == MESSAGE_CLASS.APPOINTMENT) {
Set<Integer> propertTags = new HashSet<>();
// These properties use name ID redirection. Use Outlook Spy to find properties of interest in a .msg file.
//Lookup the Named Property section and confirm the GUID and ID. Lookup the actual property tag number in the name ID
//chunk. The tag ID will refer to a property or chunk ID in the file.
int allAttendeesTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AllAttendeesString", 0x8238);
int locationTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "Location", 0x8208);
int appointmentStartTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentStartWhole", 0x820D);
int appointmentEndTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentEndWhole", 0x820E);
int clipStartTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "ClipStart", 0x8235);
int clipEndTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "ClipEnd", 0x8236);
int recurringTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "Recurring", 0x8223);
int recurrenceTypeTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "RecurrenceType", 0x8231);
int appointmentRecurTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentRecur", 0x8216);
int appointmentDurationTag = (int) msg.getNameIdChunks().getPropertyTag(PredefinedPropertySet.PSETID_APPOINTMENT.getClassID(), "AppointmentDuration", 0x8213);
propertTags.add(allAttendeesTag);
propertTags.add(locationTag);
propertTags.add(appointmentStartTag);
propertTags.add(appointmentEndTag);
propertTags.add(clipStartTag);
propertTags.add(clipEndTag);
propertTags.add(recurringTag);
propertTags.add(recurrenceTypeTag);
propertTags.add(appointmentRecurTag);
propertTags.add(appointmentDurationTag);
Map<Integer, Object> extendedProperties = extendedProperties(propertTags, msg);
// HSMFDump dump = new HSMFDump(new POIFSFileSystem(new File(emailPath)));
// dump.dump(System.out);
Optional.ofNullable(extendedProperties.get(allAttendeesTag)).ifPresent(c -> System.out.format("All Attendees: %s\n", c));
Optional.ofNullable(extendedProperties.get(locationTag)).ifPresent(p -> System.out.format("Location: %s\n", p));
Optional.ofNullable(extendedProperties.get(appointmentStartTag)).ifPresent(p -> System.out.format("Appointment Start: %s\n", formatDate((GregorianCalendar) p)));
Optional.ofNullable(extendedProperties.get(appointmentEndTag)).ifPresent(p -> System.out.format("Appointment End: %s\n", formatDate((GregorianCalendar) p)));
Optional.ofNullable(extendedProperties.get(clipStartTag)).ifPresent(p -> System.out.format("Clip StartTag: %s\n", formatDate((GregorianCalendar) p)));
Optional.ofNullable(extendedProperties.get(clipEndTag)).ifPresent(p -> System.out.format("Clip EndTag: %s\n", formatDate((GregorianCalendar) p)));
Optional.ofNullable(extendedProperties.get(recurringTag)).ifPresent(p -> System.out.format("Recurring: %s\n", p));
Optional.ofNullable(extendedProperties.get(recurrenceTypeTag)).ifPresent(p -> System.out.format("Recurrence Type: %s - %s\n", p, RecurrenceType.recurrenceTypeOf((Integer) p)));
Optional.ofNullable(extendedProperties.get(appointmentRecurTag)).ifPresent(p -> {
// see
// https://interoperability.blob.core.windows.net/files/MS-OXOCAL/%5BMS-OXOCAL%5D.pdf
// page 34, 113 for the recurrence blob format
byte[] blobBytes = (byte[]) p;
// System.out.format("Appointment Recur - Blob: %s\n", Hex.encodeHexString(blob));
ByteBuffer blob = ByteBuffer.wrap(blobBytes);
blob.order(ByteOrder.LITTLE_ENDIAN);
short readerVersion = blob.getShort();
short writerVersion = blob.getShort();
short recurFrequency = blob.getShort();
short patternType = blob.getShort();
short calendarType = blob.getShort();
int firstDateTime = blob.getInt();
int period = blob.getInt();
int slidingFlag = blob.getInt();
int patternTypeSpecific = blob.getInt();
int endType = blob.getInt();
int occurrenceCount = blob.getInt();
int firstDOW = blob.getInt();
int deletedInstanceCount = blob.getInt();
int modifiedInstanceCount = blob.getInt();
System.out.format("Appointment Recur - RecurFrequency: %s - %s\n", Integer.toHexString(recurFrequency), RecurFrequency.frequencyOf(recurFrequency));
System.out.format("Appointment Recur - PatternType: %s - %s\n", Integer.toHexString(patternType), PatternType.patternOf(patternType));
System.out.format("Appointment Recur - Period: %s\n", Integer.toHexString(period));
System.out.format("Appointment Recur - PatternTypeSpecific: %s\n", formatPatternTypeSpecific(PatternType.patternOf(patternType), patternTypeSpecific));
});
Optional.ofNullable(extendedProperties.get(appointmentDurationTag)).ifPresent(p -> System.out.format("Appointment Duration: %s\n", p));
} else {
Optional.ofNullable(msg.getDisplayFrom()).ifPresent(c -> System.out.format("From: %s\n", c));
Optional.ofNullable(msg.getDisplayTo()).ifPresent(c -> System.out.format("To: %s\n", c));
Optional.ofNullable(msg.getDisplayCC()).ifPresent(c -> System.out.format("CC: %s\n", c));
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static enum RecurrenceType {
/* single */rectypeNone(0x00000000), rectypeDaily(0x00000001), rectypeWeekly(0x00000002), rectypeMonthly(0x00000003), rectypeYearly(0x00000004);
private final int recurrenceType;
private RecurrenceType(int value) {
this.recurrenceType = value;
}
int recurrenceType() {
return this.recurrenceType;
}
public static RecurrenceType recurrenceTypeOf(int recurrenceType) {
for (RecurrenceType r : RecurrenceType.values()) {
if (r.recurrenceType == recurrenceType) {
return r;
}
}
return null;
}
}
public static enum RecurFrequency {
Daily(0x200A), Weekly(0x200B), Monthly(0x200C), Yearly(0x200D);
private final int frequency;
private RecurFrequency(int value) {
this.frequency = value;
}
int frequency() {
return this.frequency;
}
public static RecurFrequency frequencyOf(int frequency) {
for (RecurFrequency r : RecurFrequency.values()) {
if (r.frequency == frequency) {
return r;
}
}
return null;
}
}
public static enum PatternType {
Day(0x0000), Week(0x0001), Month(0x0002), MonthEnd(0x0004), MonthNth(0x0003);
private final int pattern;
private PatternType(int value) {
this.pattern = value;
}
int pattern() {
return this.pattern;
}
public static PatternType patternOf(int pattern) {
for (PatternType r : PatternType.values()) {
if (r.pattern == pattern) {
return r;
}
}
return null;
}
}
public static String formatPatternTypeSpecific(PatternType patternType, int patternTypeSpecific) {
StringBuilder sb = new StringBuilder();
if (patternType == PatternType.Week || patternType == PatternType.MonthNth) {
checkWeekDayMask(patternTypeSpecific, 1, "Sunday", sb);
checkWeekDayMask(patternTypeSpecific, 2, "Monday", sb);
checkWeekDayMask(patternTypeSpecific, 3, "Tuesday", sb);
checkWeekDayMask(patternTypeSpecific, 4, "Wednesday", sb);
checkWeekDayMask(patternTypeSpecific, 5, "Thrusday", sb);
checkWeekDayMask(patternTypeSpecific, 6, "Friday", sb);
checkWeekDayMask(patternTypeSpecific, 7, "Saturday", sb);
} else if (patternType == PatternType.Month) {
sb.append(patternTypeSpecific);
}
return sb.toString();
}
private static void checkWeekDayMask(int intRepresentation, int position, String day, StringBuilder buffer) {
boolean matches = ((intRepresentation) & (1 << (position - 1))) != 0;
if (matches) {
if (buffer.length() > 0) {
buffer.append(", ");
}
buffer.append(day);
}
}
public static Map<Integer, Object> extendedProperties(Set<Integer> propertTags, MAPIMessage msg) {
Map<Integer, Object> mappings = new HashMap<>();
for (Chunk chunk : msg.getMainChunks().getChunks()) {
// System.out.format("Chunk - %s - %s - %s - %s\n", toHex(chunk.getChunkId()),
// chunk.getEntryName(), chunk.getType().getName(), chunk.getClass().getName());
if (chunk instanceof PropertiesChunk) {
PropertiesChunk props = (PropertiesChunk) chunk;
for (Entry<MAPIProperty, PropertyValue> prop : props.getRawProperties().entrySet()) {
if (propertTags.contains(prop.getKey().id) && prop.getValue().getValue() != null) {
mappings.put(prop.getKey().id, prop.getValue().getValue());
// System.out.format("Property %d - %s - %s - %s\n", prop.getKey().id,
// toHex(prop.getKey().id), prop.getValue().getActualType(),
// prop.getValue().getValue());
}
}
} else {
if (propertTags.contains(chunk.getChunkId())) {
// System.out.format("Chunk - %s - %s - %s - %s\n", toHex(chunk.getChunkId()),
// chunk.getEntryName(), chunk.getType().getName(), chunk.getClass().getName());
if (chunk instanceof StringChunk) {
StringChunk stringChunk = (StringChunk) chunk;
mappings.put(chunk.getChunkId(), stringChunk.getValue());
} else if (chunk instanceof ByteChunk) {
ByteChunk byteChunk = (ByteChunk) chunk;
mappings.put(chunk.getChunkId(), byteChunk.getValue());
}
}
}
}
return mappings;
}
public static String formatDate(GregorianCalendar gc) {
return DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(gc.toZonedDateTime().toOffsetDateTime());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment