Skip to content

Instantly share code, notes, and snippets.

@VALLIS-NERIA
Created May 15, 2019 09:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VALLIS-NERIA/01536fc5d432c1bac69b6c0d0f7efb12 to your computer and use it in GitHub Desktop.
Save VALLIS-NERIA/01536fc5d432c1bac69b6c0d0f7efb12 to your computer and use it in GitHub Desktop.
Copy files from OneDrive to Azure blob storage
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Security;
using System.Text;
using System.Threading.Tasks;
using System.Web;
using Microsoft.SharePoint.Client;
using Microsoft.WindowsAzure.Storage;
using Microsoft.WindowsAzure.Storage.Blob;
using File = System.IO.File;
namespace Library {
public class OneDriveCopier {
private CloudBlobContainer container;
private string destBlobContainerName;
private string password;
private int roundAmount = 4000;
private string sharePointUserName;
private string siteName;
private string tempDir;
private string userEmail;
private string xml;
public OneDriveCopier() {
this.siteName = /* MOSAIC */;
this.password = /* MOSAIC */;
this.userEmail = /* MOSAIC */;
this.destBlobContainerName = /* MOSAIC */;
this.sharePointUserName = /* MOSAIC */;
this.tempDir = /* MOSAIC */;
var storageConnectionString = /* MOSAIC */;
var storageAccount = CloudStorageAccount.Parse(storageConnectionString);
var blobClient = storageAccount.CreateCloudBlobClient();
this.container = blobClient.GetContainerReference(this.destBlobContainerName);
this.xml =
@"<View Scope=""RecursiveAll"">
<Query>
<OrderBy Override='TRUE'>
<FieldRef Name='FileDirRef' />
<FieldRef Name='FileLeafRef' />
</OrderBy>
</Query>
<ViewFields>
<FieldRef Name='FileDirRef' />
</ViewFields>
<RowLimit Paged='TRUE'>" + this.roundAmount +
"</RowLimit></View>";
}
/// <summary>
/// Copies all files within the <paramref name="prefix"/>.
/// </summary>
/// <param name="prefix">The subdirectory of your OneDrive.
/// If you want to copy all files in your OneDrive, pass <see cref="string.Empty"/> or null;
/// If you want to copy files in folder "My Documents/Code", pass "My Documents/Code/".
/// </param>
/// <param name="log">The logger</param>
/// <returns>The asynchronous <see cref="Task"/>.</returns>
public async Task DirectCopy(string prefix = null, IMyLogger log = null) {
await this.CopyOnePage(prefix, null, log);
}
/// <summary>
/// Split files into slices each containing <see cref="roundAmount"/> files
/// </summary>
/// <param name="prefix">The subdirectory of your OneDrive.
/// If you want to copy all files in your OneDrive, pass <see cref="string.Empty"/> or null;
/// If you want to copy files in folder "My Documents/Code", pass "My Documents/Code/".
/// </param>
/// <param name="log">The logger.</param>
/// <param name="textWriter">To write the paging info.</param>
/// <returns>A series of <see cref="string"/>s containing paging info.</returns>
public IEnumerable<string> SplitOneDrive(string prefix, IMyLogger log = null, TextWriter textWriter = null) {
var context = new ClientContext(this.siteName);
var ss = new SecureString();
foreach (char c in this.password) {
ss.AppendChar(c);
}
context.Credentials = new SharePointOnlineCredentials(this.userEmail, ss);
var web = context.Web;
context.Load(web);
context.Load(web.Lists);
context.Load(web, wb => wb.ServerRelativeUrl);
context.ExecuteQuery();
var list = context.Web.Lists.GetByTitle("文档");
context.Load(list);
context.ExecuteQuery();
var folder = web.GetFolderByServerRelativeUrl(web.ServerRelativeUrl + "/Documents/" + prefix);
context.Load(folder);
context.ExecuteQuery();
ListItemCollectionPosition position = null;
context.Load(folder.Files);
context.ExecuteQuery();
int i = 1;
do {
log?.Info($"Round {i} ({(i - 1) * this.roundAmount} ~ {i * this.roundAmount}) begin.");
var q = CamlQuery.CreateAllItemsQuery();
q.ViewXml = this.xml;
q.FolderServerRelativeUrl = folder.ServerRelativeUrl;
q.ListItemCollectionPosition = position;
var items = list.GetItems(q);
context.Load(items, item => item.ListItemCollectionPosition);
context.ExecuteQuery();
position = items.ListItemCollectionPosition;
if (position != null) {
//queue.AddMessage(new CloudQueueMessage(position.PagingInfo), TimeSpan.FromHours(6), TimeSpan.FromMinutes((i - 1) * 30));
log?.Info(position.PagingInfo);
textWriter?.WriteLine(position.PagingInfo);
textWriter?.Flush();
log?.Info($"Round {i} logged.");
yield return position.PagingInfo;
}
i++;
} while (position != null);
textWriter?.Close();
yield break;
}
/// <summary>
/// Copies files within <paramref name="paging"/> and <paramref name="prefix"/>.
/// </summary>
/// <param name="prefix">The subdirectory of your OneDrive. Make sure to keep consistent with <paramref name="prefix"/> given to <see cref="SplitOneDrive"/>.
/// </param>
/// <param name="paging">The paging info</param>
/// <param name="log">The logger</param>
/// <returns>The asynchronous <see cref="Task"/>.</returns>
public async Task CopyOnePage(string prefix, string paging, IMyLogger log) {
var context = new ClientContext(this.siteName);
var ss = new SecureString();
foreach (char c in this.password) {
ss.AppendChar(c);
}
context.Credentials = new SharePointOnlineCredentials(this.userEmail, ss);
var web = context.Web;
context.Load(web);
context.Load(web.Lists);
context.Load(web, wb => wb.ServerRelativeUrl);
context.ExecuteQuery();
var list = context.Web.Lists.GetByTitle("文档");
context.Load(list);
context.ExecuteQuery();
var folder = web.GetFolderByServerRelativeUrl(web.ServerRelativeUrl + "/Documents/" + prefix);
context.Load(folder);
context.ExecuteQuery();
var position = string.IsNullOrEmpty(paging) ? null : new ListItemCollectionPosition {PagingInfo = paging};
context.Load(folder.Files);
context.ExecuteQuery();
log.Info($"page {paging}");
var q = CamlQuery.CreateAllItemsQuery();
q.ViewXml = this.xml;
q.FolderServerRelativeUrl = folder.ServerRelativeUrl;
q.ListItemCollectionPosition = position;
var items = list.GetItems(q);
context.Load(items, item => item.ListItemCollectionPosition);
context.ExecuteQuery();
var wait = new List<Task>();
int i = 0;
foreach (var item in items) {
i++;
//context.Load(item);
//context.Load(item.File);
//context.Load(item.ContentType);
//context.ExecuteQuery();
if (!((string) item.FieldValues["MetaInfo"]).Contains("hassubdir")) {
//files.Enqueue(item);
var filePath = ((string) item.FieldValues["FileRef"]).Replace($"/personal/${this.sharePointUserName}/Documents/${prefix}", "");
var blob = this.container.GetBlockBlobReference(filePath);
if (blob.Exists()) {
log.Info($"Skipping {filePath}");
continue;
}
context.Load(item.File);
context.ExecuteQuery();
Task task;
if (item.File.Length < 2000_000_000) {
log.Info($"{i} Copying {filePath}");
task = this.CopyStream(item, blob, log);
if (item.File.Length > 100_000_000) {
await task;
}
}
else {
var documentPath = "Documents/" + prefix + filePath;
if (!Directory.Exists(this.tempDir)) {
Directory.CreateDirectory(this.tempDir);
}
var tempFile = this.tempDir + Path.GetFileName(filePath);
log.Info($"{i} Downloading {filePath} ({item.File.Length / 1024 / 1024} MB)");
try {
DownloadToTempFile(context, documentPath, tempFile);
}
catch (WebException) {
log.Error($"DOWNLOAD FAIL: {filePath}");
continue;
}
log.Info($"Uploading {filePath}");
blob.UploadFromFile(tempFile);
blob.Properties.ContentType = MimeTypes.GetMimeType(blob.Name);
blob.SetProperties();
File.Delete(tempFile);
task = Task.Run(() => { });
}
wait.Add(task);
}
}
Task.WaitAll(wait.ToArray());
var success = wait.Count(t => t.IsCompleted);
if (success == wait.Count) {
log.Info($"page {paging} finished. All success.");
}
else {
log.Error($"page {paging} finished. {success} successes, {wait.Count - success} errors.");
}
}
static void DownloadToTempFile(ClientContext clientContext, string documentPath, string tempFilePath) {
if (documentPath.StartsWith("/")) {
documentPath = documentPath.Substring(1);
}
var targetSite = new Uri(clientContext.Web.Url);
var spCredentials = (SharePointOnlineCredentials)clientContext.Credentials;
string authCookieValue = spCredentials.GetAuthenticationCookie(targetSite);
string requestUrl = clientContext.Url + "/_vti_bin/_vti_aut/author.dll";
string method = HttpUtility.UrlEncode("get document:15.0.0.4455");
var serviceName = HttpUtility.UrlEncode(clientContext.Web.ServerRelativeUrl);
documentPath = HttpUtility.UrlEncode(documentPath);
string oldThemeHtml = "false";
string force = "true";
string getOption = "none";
string timeOut = "0";
string expandWebPartPages = "true";
string rpcCallString = string.Format(
"method={0}&service%5fname={1}&document%5fname={2}&old%5ftheme%5fhtml={3}&force={4}&get%5foption={5}&doc%5fversion=&timeout={6}&expandWebPartPages={7}",
method, serviceName, documentPath, oldThemeHtml, force, getOption, timeOut, expandWebPartPages);
var wReq = WebRequest.Create(requestUrl) as HttpWebRequest;
wReq.Method = "POST";
wReq.ContentType = "application/x-vermeer-urlencoded";
wReq.Headers["X-Vermeer-Content-Type"] = "application/x-vermeer-urlencoded";
wReq.UserAgent = "MSFrontPage/15.0";
wReq.UseDefaultCredentials = false;
wReq.Accept = "auth/sicily";
wReq.Headers["MIME-Version"] = "1.0";
wReq.Headers["X-FORMS_BASED_AUTH_ACCEPTED"] = "T";
wReq.Headers["Accept-encoding"] = "gzip, deflate";
wReq.Headers["Cache-Control"] = "no-cache";
wReq.CookieContainer = new CookieContainer();
wReq.CookieContainer.Add(
new Cookie("SPOIDCRL",
authCookieValue.TrimStart("SPOIDCRL=".ToCharArray()),
String.Empty,
targetSite.Authority));
wReq.KeepAlive = true;
//create unique dir for the download
using (var requestStream = wReq.GetRequestStream()) {
byte[] rpcHeader = Encoding.UTF8.GetBytes(rpcCallString);
requestStream.Write(rpcHeader, 0, rpcHeader.Length);
requestStream.Close();
using (Stream sOut = File.OpenWrite(tempFilePath)) {
using (var sr = wReq.GetResponse().GetResponseStream()) {
byte[] buffer = new byte[32 * 1024];
int read;
bool isHtmlRemoved = false;
while ((read = sr.Read(buffer, 0, buffer.Length)) > 0) {
if (!isHtmlRemoved) {
string result = Encoding.UTF8.GetString(buffer);
int startPos = result.IndexOf("</html>");
if (startPos > -1) {
//get the length of the text, '</html>' as well
startPos += 8;
sOut.Write(buffer, startPos, read - startPos);
isHtmlRemoved = true;
}
}
else {
sOut.Write(buffer, 0, read);
}
}
}
}
}
}
async Task CopyStream(ListItem file, CloudBlockBlob blob, IMyLogger log) {
try {
//file.Context.Load(file.File);
var res = file.File.OpenBinaryStream();
file.Context.ExecuteQuery();
using (var fs = res.Value) {
await blob.UploadFromStreamAsync(fs);
string mimeType = MimeTypes.GetMimeType((string)file.FieldValues["FileLeafRef"]);
if (!string.IsNullOrEmpty(mimeType)) {
blob.Properties.ContentType = mimeType;
await blob.SetPropertiesAsync();
}
}
}
catch (Exception e) {
log.Error($"FAIL: file: {(string)file.FieldValues["FileRef"]}, Exception: {e.Message}");
throw;
}
}
}
}
@Gunnnn
Copy link

Gunnnn commented May 25, 2021

Hello! Please share examples of how to fill in values of connection to OneDrive and BlobStorage in this code, marked as /* MOSAIC */

@AdrianTroy
Copy link

could you make a program similar to Goodsync or Gs Richcopy 360 to migrate OneDrive to Azure blob with a GUI interface?

@VALLIS-NERIA
Copy link
Author

Hello! Please share examples of how to fill in values of connection to OneDrive and BlobStorage in this code, marked as /* MOSAIC */

"siteName": "https://your-org.sharepoint.com/personal/your-name",
sitePassword is just plaintext password
userEmail is just your account's email
destBlobContainerName is the name of blob container
sharePointUserName is same as the last part of siteName

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment