Create a gist now

Instantly share code, notes, and snippets.

@jckantor /stock.m
Last active Apr 19, 2017

What would you like to do?
A Matlab class for obtaining quotes from Yahoo Finance.
% STOCK A Matlab class for obtaining quotes from Yahoo Finance.
%
% SYNOPSIS
%
% stock(symbol,speriod,sfreq)
% Creates an object for the current quote and historical prices for
% the stock denoted by SYMBOL.
%
% symbol String or cell array of strings denoting desired stocks.
%
% speriod String in the format ddd[dwmy] denoting the historical
% sample period. Default is '5y'.
%
% sfreq String that is either 'm', 'w', or 'd' denoting
% historical sample freqeuncy. Default is 'd'.
%
% USAGE
%
% X = stock('F')
% Creates an object with the current quote and three years of
% weekly price data for Ford.
%
% X = stock('xom','10y','m')
% Creats an object with the current quote and ten years of
% monthly historical price information for Exxon-Mobile.
%
% X = stock({'uso','ung','usl'})
% Create an object array containing data for three ETF"s.
%
% METHODS
%
% stock('f').plot
% Creates and plots the historical price data.
%
% stock('f').garch
% Creates and fits a GARCH model to the historical price data.
% Requires the econometrics toolbox.
%
% stock.lookup('ford')
% A static method to look up stock symbols for companies using
% 'ford' as the search string.
%
% stock.demo
% Demonstrates use of the class
%
% Yahoo finance provides stock quotes through a url mechanism
% involving various URL parameters. For more options, consult
%
% http://www.etraderzone.com/free-scripts/50-yahoo-stock-quotes.html
% http://www.gummy-stuff.org/Yahoo-data.htm
% http://www.goldb.org/ystockquote.html
% http://finance.yahoo.com/exchanges
%
% Additional sources of techniques and background information
%
% http://tradingwithmatlab.blogspot.com/2008/06/estimate-historical-volatility.html
% http://jarloo.com/tutorials/get-yahoo-finance-api-data-via-yql/
% Jeffrey Kantor
% 12/10/2010 Added demonstrations of typical use cases.
% 12/08/2010 Added a intersect method to provide a common set of dates
% for stock objects stored in an object array.
% 11/27/2010 Historical Volatility estimated with Yang Zhang O-H-L-C
% estimator.
% 11/21/2010 Added a static stock symbol lookup method
% 11/20/2010 Added disp method and additional fields from yahoo finance
% 11/18/2010 First posted on Matlab Central
% Originally developed between 2008 and 2010
classdef (CaseInsensitiveProperties = true) stock < handle
properties
% Descriptive Information
Symbol = ''; % Symbol (String)
Name = ''; % Descriptive name (String)
Source = ''; % Source (String)
Exchange = ''; % Exchange (String)
% Historical Data (ascending dates, i.e., most recent data is last)
hdates = []; % Historical dates in Matlab serial number format
% in ascending order.
Price = []; % Historical price data adjusted for splits,
% dividends, settlements, or other factors
% needed longitudinal analysis.
Open = []; % Vector of historical opening prices
High = []; % Vector of historical high prices
Low = []; % Vector of historical low prices
Close = []; % Vector of historical close prices
Volume = []; % Vector of trading volumes
AdjClose = []; % Vector of adjusted closing prices. This is
% is also stored in the Price field
% Current Quote
last_price % Yahoo l1: Last Price
last_date % Yahoo d1: Last Trade Date
last_time % Yahoo t1: Last Trade Time
day_change % Yahoo c1: Change
prev_close % Yahoo p: Previous Close
day_open % Yahoo o: Open
day_high % Yahoo h: Day's High
day_low % Yahoo g: Day's Low
day_volume % Yahoo v: Volume
pe % Yahoo r: Price/Earnings Ratio
peg % Yahoo r5: Price to Earnings Growth
div_yield % Yahoo y: Dividend Yield [%]
year_low % Yahoo j: 52-Week Low
year_high % Yahoo k: 52-Week High
end
properties (Dependent = true)
Dates % Vector of Dates
Freq % Samples/year in the historical data set.
% Daily approx. 252/year
% Weekly approx. 52.2/year
% Monthly approx. 12/year
Period % Length of historical record in days
Length % Length of historical record in samples.
LogReturns % Log returns on historical prices. *Not annualized*
MeanLogReturns % Mean annualized Log return on historical prices.
Returns % Returns on historical prices. *Not annualized*
MeanReturns % Mean *annualized* returns on historical prices.
Volatility % Historical Volatility (annualized)
VolatilityCC % Close-to-Close historical volatility (annualized)
VolatilityYZ % Yang Zhang estimate of historical volatility (annualized)
end
methods
function q = stock(Symbol,sPeriod,sFreq)
% Provide for calling with no arguments. This is used by Matlab
% to initialize object arrays.
if nargin == 0
return
end
if ischar(Symbol)
q.Symbol = Symbol;
elseif iscellstr(Symbol)
for n = 1:length(Symbol)
q(n).Symbol = Symbol{n};
end
else
error('Symbol is not a cell array of symbols');
end
if ischar(Symbol)
q.Symbol = Symbol;
elseif iscellstr(Symbol)
for n = 1:length(Symbol)
q(n).Symbol = Symbol{n};
end
else
error('Symbol is not a cell array of symbols');
end
% Validate and store date/data frequency. Default is 'd'
if (nargin >= 3) && ~isempty(char(sFreq))
sFreq = lower(strtrim(sFreq));
if ~ismember(sFreq,{'d','m','w'})
error('Frequency must be ''d'',''m'', or ''w''');
end
else
sFreq = 'd';
end
% Validate the Period string
if (nargin < 2) || isempty(char(sPeriod))
sPeriod = '3y';
else
sPeriod = lower(strtrim(sPeriod));
[mat tok] = regexp(sPeriod,'^(\d*)([dwmy])$','match','tokens');
if isempty(mat)
error('Invalid Period Specification');
end
end
% Get Data
for n = 1:length(q)
if length(q) >= 5
disp(q(n).Symbol);
end
getQuote(q(n));
getHistory(q(n),sPeriod,sFreq);
end
q.intersect;
end % stock
function getQuote(q)
% Create URL and read response from Yahoo Finance
[str,status] = urlread( ...
['http://finance.yahoo.com/d/quotes.csv?', ...
sprintf('s=%s',char(q.Symbol)),'&f=snxl1d1t1c1pohgvrr5yjk']);
if ~status
error('Unable to read quote data from Yahoo Finance.');
end
s = parse_csv(str);
q.Source = 'Yahoo Finance';
q.Symbol = s{ 1}; % tag s: Symbol
q.Name = s{ 2}; % tag n: Name
q.Exchange = s{ 3}; % tag x: Exchange
q.last_price = s{ 4}; % tag l1: Price of last trade
q.last_date = s{ 5}; % tag d1: Date of last trade
q.last_time = s{ 6}; % tag t1: Time of last trade
q.day_change = s{ 7}; % tag c1: Day change
q.prev_close = s{ 8}; % tag p: Previous Close
q.day_open = s{ 9}; % tag o: Day open
q.day_high = s{10}; % tag h: Day high
q.day_low = s{11}; % tag g: Day low
q.day_volume = s{12}; % tag v: Day volume
q.pe = s{13}; % tag r: Price/Earnings
q.peg = s{14}; % tag r5: Price/Earnings Growth
q.div_yield = s{15}; % tag y: Dividend Yield
q.year_low = s{16}; % tag j: 52-Week Low
q.year_high = s{17}; % tag k: 52-Week High
end % getQuote
function getHistory(q, sPeriod, sFreq)
% Parse arguments. This function may be used to update the
% history of an existing object, so need to validate input
% arguments.
if nargin < 3
error('getHistory requires three agruments');
end
sFreq = lower(strtrim(sFreq));
if ~ismember(sFreq,{'d','m','w'})
error('Frequency must be ''d'',''m'', or ''w''');
end
sPeriod = lower(strtrim(sPeriod));
[mat tok] = regexp(sPeriod,'^(\d*)([dwmy])$','match','tokens');
if isempty(mat)
error('Invalid Period Specification');
end
switch tok{1}{2}
case 'd'
n = 1;
case 'w'
n = 7;
case 'm'
n = 365.25/12;
case 'y'
n = 365.25;
end
startDate = datenum(date) - round(n*str2double(tok{1}{1}));
[startYear,startMonth,startDay] = datevec(startDate);
% Construct Yahoo url
urlstr = ['http://ichart.finance.yahoo.com/table.csv?',...
'&s=', q.Symbol, ...
'&a=', num2str(startMonth-1), ... % Start Month-1
'&b=', num2str(startDay),... % Start Day
'&c=', num2str(startYear), ... % Start Year
'&g=', sFreq]; % Frequency (d->daily, w->weekly, m->monthly,
% Read url and parse into a cell array of individual lines
[str,status] = urlread(urlstr);
if ~status
return
end
s = textscan(str,'%s','delimiter','\n');
s = s{1};
% Skip the first line, then parse each line into fields
n = length(s) - 1;
h = zeros(n,7);
for k = 1:n;
t = textscan(s{k+1},'%s%f%f%f%f%f%f','delimiter',',');
t{1} = datenum(t{1});
h(k,:) = cell2mat(t);
end
% Reverse order so oldest data at the top of the columns
q.Dates = h(end:-1:1,1);
q.Open = h(end:-1:1,2);
q.High = h(end:-1:1,3);
q.Low = h(end:-1:1,4);
q.Close = h(end:-1:1,5);
q.Volume = h(end:-1:1,6);
q.AdjClose = h(end:-1:1,7);
% Put Yahoo Adjusted Close in the "Price" field for subsequent
% analysis and model fitting
q.Price = q.AdjClose;
end % getHistory
function plot(q,vargin)
if length(q(:)) > 1
N = length(q(:));
for n = 1:N
figure(n);
plot(q(n));
end
return
end
subplot(4,1,1:2)
semilogy(q.Dates,q.Price);
title(sprintf('%s: %s',q.Exchange,q.Name));
ylabel('Adjusted Close');
datetick('x',12);
set(gca,'YTick',round(100*get(gca,'YTick'))/100);
grid;
subplot(4,1,3)
plot(q.Dates,q.LogReturns);
title(sprintf('Historical Volatility = %6.3f',q.Volatility));
ylabel('Log Returns');
datetick('x',12);
grid;
subplot(4,1,4)
plot(q.Dates,q.Volume);
ylabel('Volume');
datetick('x',12);
grid;
end % plot
function set.Dates(q,dates)
q.hdates = dates;
end
function dates = get.Dates(q)
{q.hdates};
length(q);
numel(q);
nargout;
dates = [];
if isempty(q)
return;
end
N = length(q(:));
dates = q(1).hdates;
for n = 2:N
dates = union(dates,q(n).hdates)
end
end
function r = get.Freq(q)
% Get mean difference between dates of valid price data
idx = find(~isnan(q.Price));
r = mean(diff(q.Dates(idx)));
end
function r = get.Period(q)
idx = find(~isnan(q.Price));
r = max(q.Dates(idx)) - min(q.Dates(idx));
end
function r = get.Length(q)
r = length(q.Dates);
end % get.Length
function r = get.LogReturns(q)
% Compute Log Returns using Price data
[n,m] = size(q.Price);
if n > 1
r = [zeros(1,m);diff(log(q.Price))];
elseif n == 1
r = NaN(1,m);
else
r = [];
end
end % get.LogReturns
function r = get.MeanLogReturns(q)
% Computation of Historical Log Returns. Computes the mean
% differences in Dates, then annualizes the Log Returns
r = mean(q.LogReturns)*(365.25/q.Freq);
end % get.MeanLogReturns
function r = get.Returns(q)
% Compute Log Returns using Price data
[n,m] = size(q.Price);
if n > 1
r = [zeros(1,m);diff(q.Price)]./q.Price;
elseif n == 1
r = NaN(1,m);
else
r = [];
end
end
function r = get.Volatility(q)
% Estimation of annualized historical volatility. Computes the
% standard deviation and rescales to an annual basis.
r = q.VolatilityYZ;
end % get.Volatility
function r = get.VolatilityCC(q)
% Estimation of annualized historical volatility using adjusted
% close-to-close data. Computes the standard deviation and
% rescales to an annual basis
y = q.LogReturns;
y = y(find(~isnan(y)));
r = std(q.LogReturns)*sqrt(365.25/q.Freq);
end % get.VolatilityCC
function r = get.VolatilityYZ(q)
% Yang Zhang estimate of historical volatility using
% Open-High-Low-Close prices.
% http://www.sitmo.com/eq/417
% http://tradingwithmatlab.blogspot.com/2008/06/estimate-historical-volatility.html
n = q.Length;
k = 0.34/(1+(n+1)/(n-1));
% Rescale historical O-H-L-C for price splits, etc.
sf = q.AdjClose./q.Close;
op = sf.*q.Open; % Open
cl = sf.*q.Close; % High
so = std(log(op(2:end)./cl(1:end-1)));
sc = std(log(q.Close./q.Open));
vrs = sum(log(q.High./q.Close).*log(q.High./q.Open) + ...
log(q.Low./q.Close).*log(q.Low./q.Open))/n;
r = sqrt(so^2 + k*sc^2 + (1-k)*vrs)*sqrt(365.25/q.Freq);
end % get.VolatilityYZ
function disp(q)
% disp(q)
% Formatting of display for a stock object or array of stock
% objects. A detailed display is provided for a single stock
% object. A summary display is provided for an array of stock
% objects.
if length(q) == 0
return
elseif length(q) == 1
% Disp for a single object
fprintf('-----------------------------------------------\n');
fprintf('%-17s (%s:%s)\n',q.Name,q.Exchange,q.Symbol);
fprintf('-----------------------------------------------\n');
fprintf('%-19s %6.2f','Last Trade:',q.last_price);
fprintf(' (%s %s)\n',q.last_time,q.last_date);
fprintf('%-19s %6.2f (%4.2f%%)\n','Daily Change:', ...
q.day_change, 100*q.day_change/q.prev_close);
fprintf('%-19s %6s\n','Prev. Close:',num2str(q.prev_close,'%6.2f'));
fprintf('%-19s %6s\n','Day Open:',num2str(q.day_open,'%6.2f'));
fprintf('%-19s %6s - %6s\n','Day Range:', ...
num2str(q.day_low,'%6.2f'),num2str(q.day_high,'%6.2f'));
fprintf('%-19s %6s - %6s\n','52wk Range:', ...
num2str(q.year_low,'%6.2f'),num2str(q.year_high,'%6.2f'));
fprintf('%-19s %6s\n','P/E:',num2str(q.pe,'%6.2f'));
fprintf('%-19s %6s %%\n','Dividend Yield:',num2str(q.div_yield,'%6.2f'));
fprintf('\n%-19s %s to %s\n','Price History:', datestr(min(q.Dates)),datestr(max(q.Dates)));
fprintf('%-19s %6d samples\n','Samples:',q.Length);
fprintf('%-19s %6d days\n','Period:',q.Period);
fprintf('%-19s %6.2f samples/year\n','Sample Rate:',365.25/q.Freq);
fprintf('%-19s %6.2f %% annualized\n','Volatility:',100*q.Volatility);
fprintf('%-19s %6.2f %% annualized\n','Mean Log Return:',100*q.MeanLogReturns);
else
fprintf(' %-16s %-17s %5s %5s %6s %16s %7s %5s %6s %6s\n', ...
'Symbol','Name','P/E','DY(%)','Last','52 wk Range ','P(days)','N ', ...
'LnR(%)','Vol(%)');
for n = 1:length(q(:))
fprintf('%3d) ',n)
str = sprintf('%s:%s',q(n).Symbol,q(n).Exchange);
fprintf('%-16s %-17s',str,q(n).Name);
fprintf(' %5s',num2str(q(n).pe,'%5.1f'));
fprintf(' %5s',num2str(q(n).div_yield,'%5.2f'));
fprintf(' %6.2f',q(n).last_price);
fprintf(' %6s - %6s', ...
num2str(q(n).year_low,'%6.2f'),num2str(q(n).year_high,'%6.2f'));
% fprintf(' (%s %s)',q(n).last_time,q(n).last_date);
% fprintf('Price History: %s to %s', datestr(min(q(n).Dates)),datestr(max(q(n).Dates)));
fprintf(' %7d',q(n).Period);
fprintf(' %5d',q(n).Length);
% fprintf(' %11.2f',365.25/q(n).Freq);
fprintf(' %6.2f',100*q(n).MeanLogReturns);
fprintf(' %6.2f',100*q(n).Volatility);
% fprintf('%-19s %6.2f (%4.2f%%)','Daily Change:', ...
% q(n).day_change, 100*q(n).day_change/q(n).prev_close);
% fprintf('%-19s %6s','Prev. Close:',num2str(q(n).prev_close,'%6.2f'));
% fprintf('%-19s %6s','Day Open:',num2str(q(n).day_open,'%6.2f'));
% fprintf('%-19s %6s - %6s','Day Range:', ...
% num2str(q(n).day_low,'%6.2f'),num2str(q(n).day_high,'%6.2f'));
fprintf('\n');
end
end
end % disp
function select(q,dates)
% select(q,dates)
% Given an object array of stocks and a vector of dates,
% select(q,dates) establishes a common date vector for all
% elements of the array. If dates are removed from an
% element, the corresponding historical data is removed. If
% dates are inserted, then NaN's are inserted into historical
% data.
%
% select(q)
% Given an object array of stocks, select(q) removes all
% dates with a NaN in the historical prices of any element of
% q.
if isempty(q)
return
end
% Length of Object Array
N = size(q(:),1);
% If dates is not given, then selects dates so there are no
% NaN's in the resulting [q.price]
if nargin < 2
dates = q(1).dates(~isnan(q(1).price));
for n = 2:N
dates = intersect(dates,q(n).dates(~isnan(q(n).price)));
end
end
% Largest Date
I = datenum(date);
% find number of maximum number of nonzero historical entries
Nz = sum([q.Length]);
% Allocate sparse array for historical data. Each array uses
% date for row index, object number for column index.
price = spalloc(I,N,Nz); price(dates,:) = NaN;
open = spalloc(I,N,Nz); open(dates,:) = NaN;
high = spalloc(I,N,Nz); high(dates,:) = NaN;
low = spalloc(I,N,Nz); low(dates,:) = NaN;
close = spalloc(I,N,Nz); close(dates,:) = NaN;
volume = spalloc(I,N,Nz); volume(dates,:) = NaN;
adjclose = spalloc(I,N,Nz); adjclose(dates,:) = NaN;
% For each object, put historical data in sparse arrays indexed
% by date. Then extract the data for dates of interest.
for n = 1:N
price(q(n).Dates,n) = q(n).price;
q(n).price = full(price(dates,n));
open(q(n).Dates,n) = q(n).open;
q(n).open = full(open(dates,n));
high(q(n).Dates,n) = q(n).high;
q(n).high = full(high(dates,n));
low(q(n).Dates,n) = q(n).low;
q(n).low = full(low(dates,n));
close(q(n).Dates,n) = q(n).close;
q(n).close = full(close(dates,n));
volume(q(n).Dates,n) = q(n).volume;
q(n).volume = full(volume(dates,n));
adjclose(q(n).Dates,n) = q(n).adjclose;
q(n).adjclose = full(adjclose(dates,n));
q(n).dates = dates;
end
end
function union(q)
if isempty(q)
return
end
% Length of Object Array
N = size(q(:),1);
% Find intersection of dates among all objects
dates = q(1).Dates;
for n = 2:N
dates = union(dates,q(n).Dates);
end
select(q,dates);
end
function intersect(q)
if isempty(q)
return
end
% Length of Object Array
N = size(q(:),1);
% Find intersection of dates among all objects
dates = q(1).Dates;
for n = 2:N
dates = intersect(dates,q(n).Dates);
end
select(q,dates);
end
function garch(q)
% GARCH Use the Econometrics toolbox to fit a GARCH model to the
% historical log returns
[Coeff,Errors,LLF,Innovations,Sigmas,Summary] = ...
garchfit(q.LogReturns);
subplot(2,1,1);
plot(q.Dates,Innovations);
title(q.Name);
ylabel('Innovations');
datetick('x',12);
subplot(2,1,2);
plot(q.Dates,Sigmas);
ylabel('Sigma');
datetick('x',12);
figure;
qqplot(Innovations);
end % garch
end % methods
methods(Static)
function lookup(str)
% lookup(str)
% Looks up stock symbols using str as a search string.
% Typical usage is
%
% stock.lookup('Google')
%
% Requires the function parse_json to be in the Matlab path.
url = ['http://d.yimg.com/autoc.finance.yahoo.com/autoc?',...
'query=', strrep(strtrim(str),' ','%20'), ...
'&callback=','YAHOO.Finance.SymbolSuggest.ssCallback'];
[sout,status] = urlread(url);
if ~status
error('Failed to read url');
end
r = parse_json(sout(40:end-1));
for n = 1:length(r.ResultSet.Result)
% r.ResultSet.Result{n}
fprintf('%-15s',r.ResultSet.Result{n}.symbol);
fprintf('%-36s',strtrim(r.ResultSet.Result{n}.name));
if isfield(r.ResultSet.Result{n},'exchDisp')
fprintf('%-14s',r.ResultSet.Result{n}.exchDisp);
else
fprintf('%-14s',' ');
end
fprintf('%-10s\n',r.ResultSet.Result{n}.typeDisp);
end
end
function demo
% demo
% Demonstrates use of the stock class
echo on
% Lookup stock symbols
stock.lookup('ford');
% Display quotes for a selected stock
fprintf('\n\n\n');
f = stock('f');
disp(f);
f.plot;
echo off
end
end
end
%% parse_csv
%
% This is a simple csv parser sufficient to parse records returned from
% Yahoo finance.
function s = parse_csv(str)
% s = parse_csv(str)
% Given a string in .csv format, parses the string into a cell
% array. Quotes are removed from double quote delimited fields.
% Number fields are converted to double. This function is used
% to parse the records returned from Yahoo Finance.
% Trim any leading or trailing white space
str = strtrim(str);
% Regular expression parsing of csv string matching quoted,
% unquoted, and null fields. Return cell array of fields
s = regexp(str,'\"([^\"]+?)\",?|([^,]+),?|,','match');
% Clean up each field
for k = 1:length(s)
% Remove trailing comma, leading and trailing white space
s{k} = regexprep(s{k},',$','');
s{k} = strtrim(s{k});
% Remove any surrounding quotes
v = s{k};
if length(v) > 1
if v(1)=='"'
v = v(2:length(v)-1);
end
s{k} = v;
end
% If possible, convert to double
v = str2double(s{k});
if ~isnan(v)
s{k} = v;
end
end
end % parse_csv
%% parse_json
%
% The parse_json function is taken verbatim from Francois Glineur's
% contribution (File ID #23393) to Matlab Central under terms of the BSD
% license. parse_json is used to parse the response of the yahoo server to
% symbol lookup requests.
% Copyright (c) 2009, FranÁois Glineur
% All rights reserved.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are
% met:
%
% * Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
% * Redistributions in binary form must reproduce the above copyright
% notice, this list of conditions and the following disclaimer in
% the documentation and/or other materials provided with the distribution
%
% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
% IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
% ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
% LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
% POSSIBILITY OF SUCH DAMAGE.
function data = parse_json(string)
% DATA = PARSE_JSON(string)
% This function parses a JSON string and returns a cell array with the
% parsed data. JSON objects are converted to structures and JSON arrays are
% converted to cell arrays.
% F. Glineur, 2009
% (inspired by the JSON parser by Joel Feenstra on MATLAB File Exchange
% (http://www.mathworks.com/matlabcentral/fileexchange/20565) but with
% faster handling of strings)
pos = 1;
len = length(string);
% String delimiters and escape characters are identified beforehand to improve speed
esc = regexp(string, '["\\]'); index_esc = 1; len_esc = length(esc);
if pos <= len
switch(next_char)
case '{'
data = parse_object;
case '['
data = parse_array;
otherwise
error_pos('Outer level structure must be an object or an array');
end
end
function object = parse_object
parse_char('{');
object = [];
if next_char ~= '}'
while 1
str = parse_string;
if isempty(str)
error_pos('Name of value at position %d cannot be empty');
end
parse_char(':');
val = parse_value;
object.(valid_field(str)) = val;
if next_char == '}'
break;
end
parse_char(',');
end
end
parse_char('}');
end
function object = parse_array
parse_char('[');
object = cell(0, 1);
if next_char ~= ']'
while 1
val = parse_value;
object{end+1} = val;
if next_char == ']'
break;
end
parse_char(',');
end
end
parse_char(']');
end
function parse_char(c)
skip_whitespace;
if pos > len || string(pos) ~= c
error_pos(sprintf('Expected %c at position %%d', c));
else
pos = pos + 1;
skip_whitespace;
end
end
function c = next_char
skip_whitespace;
if pos > len
c = [];
else
c = string(pos);
end
end
function skip_whitespace
while pos <= len && isspace(string(pos))
pos = pos + 1;
end
end
function str = parse_string
if string(pos) ~= '"'
error_pos('String starting with " expected at position %d');
else
pos = pos + 1;
end
str = '';
while pos <= len
while index_esc <= len_esc && esc(index_esc) < pos
index_esc = index_esc + 1;
end
if index_esc > len_esc
str = [str string(pos:end)];
pos = len + 1;
break;
else
str = [str string(pos:esc(index_esc)-1)];
pos = esc(index_esc);
end
switch string(pos)
case '"'
pos = pos + 1;
return;
case '\'
if pos+1 > len
error_pos('End of file reached right after escape character');
end
pos = pos + 1;
switch string(pos)
case {'"' '\' '/'}
str(end+1) = string(pos);
pos = pos + 1;
case {'b' 'f' 'n' 'r' 't'}
str(end+1) = sprintf(['\' string(pos)]);
pos = pos + 1;
case 'u'
if pos+4 > len
error_pos('End of file reached in escaped unicode character');
end
str(end+1:end+6) = string(pos-1:pos+4);
pos = pos + 5;
end
otherwise % should never happen
str(end+1) = string(pos);
pos = pos + 1;
end
end
error_pos('End of file while expecting end of string');
end
function num = parse_number
[num, one, err, delta] = sscanf(string(pos:min(len,pos+20)), '%f', 1); % TODO : compare with json(pos:end)
if ~isempty(err)
error_pos('Error reading number at position %d');
end
pos = pos + delta-1;
end
function val = parse_value
switch(string(pos))
case '"'
val = parse_string;
return;
case '['
val = parse_array;
return;
case '{'
val = parse_object;
return;
case {'-','0','1','2','3','4','5','6','7','8','9'}
val = parse_number;
return;
case 't'
if pos+3 <= len && strcmpi(string(pos:pos+3), 'true')
val = true;
pos = pos + 4;
return;
end
case 'f'
if pos+4 <= len && strcmpi(string(pos:pos+4), 'false')
val = false;
pos = pos + 5;
return;
end
case 'n'
if pos+3 <= len && strcmpi(string(pos:pos+3), 'null')
val = [];
pos = pos + 4;
return;
end
end
error_pos('Value expected at position %d');
end
function error_pos(msg)
poss = max(min([pos-15 pos-1 pos pos+20],len),1);
if poss(3) == poss(2)
poss(3:4) = poss(2)+[0 -1]; % display nothing after
end
msg = [sprintf(msg, pos) ' : ... ' string(poss(1):poss(2)) '<error>' string(poss(3):poss(4)) ' ... '];
ME = MException('JSONparser:invalidFormat', msg);
throw(ME);
end
function str = valid_field(str)
% From MATLAB doc: field names must begin with a letter, which may be
% followed by any combination of letters, digits, and underscores.
% Invalid characters will be converted to underscores, and the prefix
% "alpha_" will be added if first character is not a letter.
if ~isletter(str(1))
str = ['alpha_' str];
end
str(~isletter(str) & ~('0' <= str & str <= '9')) = '_';
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment