Skip to content

Instantly share code, notes, and snippets.

@prasanthmj
Last active January 29, 2024 17:50
Show Gist options
  • Save prasanthmj/ec804de3b6355c3ca26984a892ad550d to your computer and use it in GitHub Desktop.
Save prasanthmj/ec804de3b6355c3ca26984a892ad550d to your computer and use it in GitHub Desktop.
Parse and extract data from Gmail to Google Sheet. Read full article here: http://blog.gsmart.in/parse-and-extract-data-from-gmail-to-google-sheets/
<!DOCTYPE html>
<html>
<head>
<base target="_top">
<title>Message Display Test</title>
<link rel="stylesheet" href="https://ssl.gstatic.com/docs/script/css/add-ons1.css">
</head>
<body style="padding:3em;">
<h1>Messages</h1>
<ul>
<? for(var m=0;m<messages.length;m++){ ?>
<li><?= messages[m].getSubject() ?></li>
<p><?= messages[m].getPlainBody() ?></p>
<? } ?>
</ul>
</body>
</html>
function getRelevantMessages()
{
var threads = GmailApp.search("newer_than:1d AND from:citicorp.com AND subject:Transaction confirmation AND -label:payment_processing_done",0,100);
var messages=[];
threads.forEach(function(thread)
{
messages.push(thread.getMessages()[0]);
});
return messages;
}
function parseMessageData(messages)
{
var records=[];
if(!messages)
{
//messages is undefined or null or just empty
return records;
}
for(var m=0;m<messages.length;m++)
{
var text = messages[m].getPlainBody();
var matches = text.match(/Rs\.\s+([\d\,\.]+)\s+(?:spent on card)\s+(\d+)\s+(?:on)\s+([\d\-A-Z]+)\s+(?:at)\s+([\w\s]+)\./);
if(!matches || matches.length < 5)
{
//No matches; couldn't parse continue with the next message
continue;
}
var rec = {};
rec.amount = matches[1];
rec.card = matches[2];
rec.date= matches[3];
rec.merchant = matches[4];
//cleanup data
rec.amount = parseFloat(rec.amount.replace(/,/g, ''));
records.push(rec);
}
return records;
}
function getMessagesDisplay()
{
var templ = HtmlService.createTemplateFromFile('messages');
templ.messages = getRelevantMessages();
return templ.evaluate();
}
function getParsedDataDisplay()
{
var templ = HtmlService.createTemplateFromFile('parsed');
templ.records = parseMessageData(getRelevantMessages());
return templ.evaluate();
}
function saveDataToSheet(records)
{
var spreadsheet = SpreadsheetApp.openByUrl("https://docs.google.com/spreadsheets/d/1ql-S5TjH5KoekRvRqiwuyequyxhgas/edit#gid=0");
var sheet = spreadsheet.getSheetByName("Sheet1");
for(var r=0;r<records.length;r++)
{
sheet.appendRow([records[r].date,records[r].card, records[r].merchant, records[r].amount ] );
}
}
function processTransactionEmails()
{
var messages = getRelevantMessages();
var records = parseMessageData(messages);
saveDataToSheet(records);
labelMessagesAsDone(messages);
return true;
}
function labelMessagesAsDone(messages)
{
var label = 'payment_processing_done';
var label_obj = GmailApp.getUserLabelByName(label);
if(!label_obj)
{
label_obj = GmailApp.createLabel(label);
}
for(var m =0; m < messages.length; m++ )
{
label_obj.addToThread(messages[m].getThread() );
}
}
function doGet()
{
return getParsedDataDisplay();
//return getMessagesDisplay();
}
<!DOCTYPE html>
<html>
<head>
<base target="_top">
<title>Data parsed from emails</title>
<link rel="stylesheet" href="https://ssl.gstatic.com/docs/script/css/add-ons1.css">
</head>
<body style="padding:3em;">
<h2>Parsed data from Messages</h2>
<table style="width:420px">
<thead>
<tr>
<th>Date</th><th>Merchant</th><th>Card</th><th>Amount</th>
</tr>
</thead>
<tbody>
<? for(var m=0;m<records.length;m++){ ?>
<tr>
<td><?= records[m].date ?></td>
<td><?= records[m].merchant ?></td>
<td><?= records[m].card ?></td>
<td><?= records[m].amount ?></td>
</tr>
<? }?>
</tbody>
</table>
<div class="block">
<button id="save_to_sheet" class="blue">Save Data to Sheet</button>
<span id="saving_label" style="visibility:hidden">Saving ...</span>
</div>
<script>
document.querySelector("#save_to_sheet").addEventListener("click",
function(e)
{
var self=this;
self.style.visibility = 'hidden';
var saving_label = document.querySelector("#saving_label");
saving_label.style.visibility = 'visible';
google.script.run.withSuccessHandler(function()
{
alert("Saved expense data");
self.style.visibility = 'visible';
saving_label.style.visibility = 'hidden';
}).withFailureHandler(function()
{
alert("an error occured while saving");
self.style.visibility = 'visible';
saving_label.style.visibility = 'hidden';
}).processTransactionEmails();
});
</script>
</body>
</html>
@n0rg
Copy link

n0rg commented Oct 17, 2019

Great script! One thing I noticed is in getRelevantMessages() for GmailApp.search function, regardless of what filters you put in, it will pull all threads regardless of being in the trash or not. I had to add a check isInInbox() before pushing to the message object.

@wfresch
Copy link

wfresch commented Jan 22, 2020

same. I ended up just changing the filter to grab messages newer than 1 hour, since the script is running hourly. (ie - "newer_than:1h ...")

@xMQ2RwURb
Copy link

Prasanth, thanks for this script I have a question though, how can I parse only one variable?
I cannot do it it seems. I only need the phone numbers from a specific sender.

I found the regex var matches = text.match(/Tel.: +\d+/g); which matches Tel.: 01xxxxxxxx
but I'm pulling my hair out and dont know what to modify here in order to make it work:

    if(!matches || matches.length < 5)
    {
      //No matches; couldn't parse continue with the next message
      continue;
    }
    var rec = {};
    rec.amount = matches[1];
    rec.card = matches[2];
    rec.date= matches[3];
    rec.merchant = matches[4];

Any hints please?

@Wdygrey
Copy link

Wdygrey commented Feb 10, 2020

Hi, I'm not a professional programmer, please if you could advise me..

I just tried with the first step by displaying those filtered email, but while running the web app I'm getting this msg -"Script function not found: doGet"..

pls help. thanks

@Henrixbor
Copy link

Henrixbor commented Feb 10, 2020

It as simple as the msg says, you are missing the one function that runs the page. You can fix the issue by adding:

function doGet() {
return getMessagesDisplay();
}

Which is also listed wrote in the end of the more detailed version of Prasanthmj's code.

@Wdygrey
Copy link

Wdygrey commented Feb 11, 2020

Hi Henrixbor ,

thanks for the advice for me this total program newbie..it works..will continue the rest..

@JusTelor
Copy link

hi Prasanth, can you help me?
function getRelevantMessages were running well and get the email.

but, I have an error here,
TypeError: Cannot read property 'length' of undefined at parseMessageData

@devluis
Copy link

devluis commented Apr 23, 2020

hi Prasanth, can you help me?
function getRelevantMessages were running well and get the email.

but, I have an error here,
TypeError: Cannot read property 'length' of undefined at parseMessageData

Hi, have the same error on parseMessageData function, can you help me?

@prasanthmj
Copy link
Author

hi Prasanth, can you help me?
function getRelevantMessages were running well and get the email.
but, I have an error here,
TypeError: Cannot read property 'length' of undefined at parseMessageData

Hi, have the same error on parseMessageData function, can you help me?

I have added a check in parseMessageData for messages being empty case.

@M3rcuti0
Copy link

Hi, I know this is an old thread, any help would be appreciated.

My filter is returning returning only 1 of 2 emails when both are visible and present in the inbox. threads are turned off. I stripped the filter down to the basics to try and identify the problem. I'm only filtering on emails send from another email address I own.

either the filter is not returning all results or there is a problem with the array.

Apologies. very new to this.

function getRelevantMessages()
{
  var filter = "from:myemailaddress@gmail.com";
  var threads = GmailApp.search(filter);
  var messages=[];
  
  threads.forEach(function(thread)
                  {
                    messages.push(thread.getMessages()[0]);
                  });
  return messages;
}

@prasanthmj
Copy link
Author

One thing you can do is trying with a partial match and see if it makes any difference:
For example:
var filter = "from:myemail"

Hi, I know this is an old thread, any help would be appreciated.

My filter is returning returning only 1 of 2 emails when both are visible and present in the inbox. threads are turned off. I stripped the filter down to the basics to try and identify the problem. I'm only filtering on emails send from another email address I own.

either the filter is not returning all results or there is a problem with the array.

Apologies. very new to this.

function getRelevantMessages()
{
  var filter = "from:myemailaddress@gmail.com";
  var threads = GmailApp.search(filter);
  var messages=[];
  
  threads.forEach(function(thread)
                  {
                    messages.push(thread.getMessages()[0]);
                  });
  return messages;
}

@M3rcuti0
Copy link

thanks - appreciate your quick reply.
I'm going to investigate this in the next few days when I get some time. thank you!

@M3rcuti0
Copy link

hi, apologies, it took some time to come back to this project due to my work commitments.

I have tried what you have suggested and unfortunately i am still struggling. I have >300 emails in my inbox, all from the same sender. The search is returning x5 random emails. If i add no filter I get all emails from junk + the 5 random emails. Any ideas?

@FHaslboeck
Copy link

FHaslboeck commented Sep 2, 2020

I do actually have the same problem. The code does not open threads, but only shows the first message within the thread. Even when I deactivate the thread, it does not show the rest of the messages. Can anyone help?

EDIT: I was able to solve the problem. What I did is within the getRelevantMessages after you search the threads, I tried to convert the messages-Array to a one-dimensional array. After that, all worked out fine!

var arrToConvert=[];

for(var i = threads.length - 1; i >=0; i--){

  arrToConvert.push(threads[i].getMessages());   

}
var messages = [];

for(var i = 0; i < arrToConvert.length; i++)
{
messages = messages.concat(arrToConvert[i]);
}

Logger.log(messages);

return messages;

@M3rcuti0
Copy link

M3rcuti0 commented Sep 5, 2020

thanks @FHaslboeck worked for me too

@mzakstudio
Copy link

I am trying to extract name and phone number from email like these

Feedback Details

Caller Name: Mr name
Call Date: Wed, 16 sep 2020 12:40:11
Caller Phone: +911234561785
Caller Email: mailid@gmail.com

can anyone help me?

@cakerules
Copy link

Hi! How can I get this script to display in google sheets so when I run it the information that populates in the browser instead populates to google sheets?

@aleckwalker
Copy link

aleckwalker commented Apr 15, 2022

Here's one function that bypasses the HTML bit and does the following:

  • Grabs relevant email threads
  • Grabs emails from those threads
  • Parses the relevant info from the emails
  • Puts the info into a Google Sheet
  • Avoids duplicates based on a unique identifier per email
function logSubmissions() 
{
  //Grab emails from powr.io pertaining to new submissions
  var threads = GmailApp.search("newer_than:1d AND in:inbox AND from:powr.io AND subject:A new pitch has been submitted to SA&E TC",0,100);
  //Collect messages from across threads
  var arrToConvert=[];
  for(var i = threads.length - 1; i >=0; i--) {
    arrToConvert.push(threads[i].getMessages());   
  }
  var messages = [];
  for(var i = 0; i < arrToConvert.length; i++) {
    messages = messages.concat(arrToConvert[i]);
  }
  Logger.log(threads.length);
  Logger.log(messages.length);
  //Open the spreadsheet
  var spreadsheet = SpreadsheetApp.openByUrl("https://docs.google.com/spreadsheets/d/1dAgAWIRLhbFnX2_2K6092uvtSuySxnX3VJaVK3DuzDQ/edit#gid=0");
  //For each email in the threads, parse the important bits
  for(var m=0;m<messages.length;m++)
  {
    var text = messages[m].getPlainBody();
    var first_name = text.match(/(?<=First Name )(.*)(?= )/)[0];
    var last_name = text.match(/(?<=Last Name )(.*)(?= )/)[0];
    var email_address = text.match(/(?<=Email Address )(.*)(?= )/)[0];
    var pitch_link = text.match(/(?<=Link to your pitch \(Dropbox, Google Drive, DocSend, etc\) )(.*)(?= )/)[0];
    var summary = text.match(/(?<=Two-sentence summary of your venture )(.*)(?= )/)[0];
    var phone = "'" + text.match(/(?<=Phone )(.*)(?= )/)[0];
    var how_heard = text.match(/(?<=How did you hear about us\? )(.*)(?= )/)[0];
    var us_venture = text.match(/(?<=Is your venture based in the US\? )(.*)(?= )/)[0];
    var stanford_venture = text.match(/(?<=Is your venture founded by a Stanford alumus\? )(.*)(?= )/)[0];
    var source = text.match(/(?<=Submission Source: )(.*)(?= )/)[0];
    //Check to see if the pitch link is already in there
    var sheet = spreadsheet.getSheetByName("Applications");
    var values = sheet.getDataRange().getValues();
    var in_sheet = false
    for (var i=0; i<values.length; i++) {
      for (var j=0; j<values[0].length; j++) {
        //Logger.log(values[i][j]);
        if(values[i][j] == pitch_link) {
          in_sheet = true;
        }
      }
    }
    //Add application to spreadsheet so long as pitch link not already in there
    if(in_sheet == false) {
      sheet.appendRow([first_name, last_name, email_address, pitch_link, summary, phone, how_heard, us_venture, stanford_venture, source]);
    }
  }
}

@FunkedelicBob
Copy link

Here's one function that bypasses the HTML bit and does the following:

Great work bringing it all into one function aleckwalker! Worked perfectly :)

@chaney99
Copy link

chaney99 commented Jan 13, 2023

When using GmailApp.search(filter,0, 100), I only get about 40 emails even though there are over 70.

It seems to be skipping emails that came on the same day even though they are different. The multiple emails all arrive at the same time every day but have different content and different ids.

Why is it skipping emails?

@chaney99
Copy link

chaney99 commented Jan 13, 2023

When using GmailApp.search(filter,0, 100), I only get about 40 emails even though there are over 70.

It seems to be skipping emails that came on the same day even though they are different. The multiple emails all arrive at the same time every day but have different content and different ids.

Why is it skipping emails?

I have determined it is skipping emails that are received at the same time. Because they are considered part of the same thread ( though not displayed that way in the gmail browser UI). So getRelevantMessages() should be updated with something like this :

var messages=[];
threads.forEach(function(thread)
{

                for ( i = 0; i < thread.getMessageCount() ; i++)
                {
                  // I am not getting all of the emails when they are received at the same time. 
                  // But this is also returning forwarded emails, etc. 
                  // So additional filter checks are necessary 

                  m = thread.getMessages()[i]; 
                  if ( m.isInInbox() && m.getFrom() == "foo.com" )
                    messages.push(thread.getMessages()[i]);
                }
                // messages.push(thread.getMessages()[0]);
              });

I am new to this but it feels like a google apps script bug.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment