Created
October 18, 2019 10:57
-
-
Save epifanio/6d0e0d7ad0e63c6cde919a0cce39390a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Recover file list from **all the branches** and for **all the branches except MASTER**\n", | |
"\n", | |
"```bash\n", | |
"git rev-list --objects --all | git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)' | sed -n 's/^blob //p' | sort --numeric-sort --key=2 | cut -c 1-12,41- | $(command -v gnumfmt || echo numfmt) --field=2 --to=iec-i --suffix=B --padding=7 --round=nearest >> sorted_by_size.txt\n", | |
"```\n", | |
"\n", | |
"```bash\n", | |
"git rev-list --objects --all | git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)' | sed -n 's/^blob //p' | grep -vF --file=<(git ls-tree -r HEAD | awk '{print $3}') | sort --numeric-sort --key=2 | cut -c 1-12,41- | $(command -v gnumfmt || echo numfmt) --field=2 --to=iec-i --suffix=B --padding=7 --round=nearest >> sorted_by_size_not_in_master.txt\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Trunk looks like:\n", | |
"\n", | |
"```bash\n", | |
"drwxrwxr-x 28 epinux epinux 4096 Oct 17 09:26 config\n", | |
"drwxrwxr-x 2 epinux epinux 4096 Oct 17 09:26 debian\n", | |
"drwxrwxr-x 2 epinux epinux 4096 Oct 17 09:26 etc\n", | |
"drwxrwxr-x 9 epinux epinux 4096 Oct 18 12:42 .git\n", | |
"-rw-rw-r-- 1 epinux epinux 12 Oct 18 05:10 gitignore\n", | |
"drwxrwxr-x 4 epinux epinux 4096 Oct 18 12:49 .git-rewrite\n", | |
"drwxrwxr-x 2 epinux epinux 4096 Oct 17 09:26 install\n", | |
"drwxrwxr-x 2 epinux epinux 4096 Oct 17 09:26 job\n", | |
"drwxrwxr-x 2 epinux epinux 4096 Oct 17 09:26 log\n", | |
"-rw-rw-r-- 1 epinux epinux 6393 Oct 17 09:26 Makefile\n", | |
"drwxrwxr-x 10 epinux epinux 4096 Oct 17 09:26 OSI_HL_AUX\n", | |
"drwxrwxr-x 5 epinux epinux 4096 Oct 17 09:26 OSI_HL_common\n", | |
"drwxrwxr-x 5 epinux epinux 4096 Oct 17 09:26 OSI_HL_Flux\n", | |
"drwxrwxr-x 24 epinux epinux 4096 Oct 17 09:26 OSI_HL_Ice\n", | |
"drwxrwxr-x 5 epinux epinux 4096 Oct 17 09:26 OSI_HL_IT\n", | |
"drwxrwxr-x 9 epinux epinux 4096 Oct 17 09:26 OSI_HL_SST\n", | |
"-rw-rw-r-- 1 epinux epinux 7066 Oct 17 09:26 osisaf_hl_versions.h\n", | |
"drwxrwxr-x 2 epinux epinux 4096 Oct 17 09:26 repro_job\n", | |
"drwxrwxr-x 2 epinux epinux 4096 Oct 17 09:26 templates\n", | |
"```\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"```bash\n", | |
"* master\n", | |
" remotes/origin/AMSR2_L2_dev\n", | |
" remotes/origin/LRSID_devbranch_Uncertainties\n", | |
" remotes/origin/LRSID_devbranch_after5p0\n", | |
" remotes/origin/OSI_HL_Flux_dev\n", | |
" remotes/origin/OSI_HL_Flux_dev@3916\n", | |
" remotes/origin/OSI_HL_Flux_dev@4060\n", | |
" remotes/origin/UbuntuPrecise-migration-branch\n", | |
" remotes/origin/UbuntuTrusty-migration-branch\n", | |
" remotes/origin/cleaning-before-github\n", | |
" remotes/origin/conc_amsr2\n", | |
" remotes/origin/conc_amsr2_esarr\n", | |
" remotes/origin/conc_preprod\n", | |
" remotes/origin/conc_preprod@9500\n", | |
" remotes/origin/conc_test_masks\n", | |
" remotes/origin/conc_test_masks@11201\n", | |
" remotes/origin/dmi_opr_test\n", | |
" remotes/origin/dmi_opr_test@3501\n", | |
" remotes/origin/dmi_opr_test@4060\n", | |
" remotes/origin/filters_oslo_visit\n", | |
" remotes/origin/filters_oslo_visit@11062\n", | |
" remotes/origin/fluxdev_satproj\n", | |
" remotes/origin/fluxdev_satproj_v2\n", | |
" remotes/origin/fm\n", | |
" remotes/origin/ice_conc_generic\n", | |
" remotes/origin/ice_conc_generic@12813\n", | |
" remotes/origin/improvements\n", | |
" remotes/origin/improvements@12272\n", | |
" remotes/origin/lrseaicedrift_newicemask\n", | |
" remotes/origin/lrseaicedrift_newicemask@3059\n", | |
" remotes/origin/lrseaicedrift_newicemask@4060\n", | |
" remotes/origin/lucid-branch\n", | |
" remotes/origin/lucid-branch@3691\n", | |
" remotes/origin/lucid-branch@4060\n", | |
" remotes/origin/medres_drift\n", | |
" remotes/origin/medres_drift@12643\n", | |
" remotes/origin/medres_drift@4059\n", | |
" remotes/origin/rel_3p2_patches\n", | |
" remotes/origin/rel_3p2_patches@4060\n", | |
" remotes/origin/rel_4p0_patches\n", | |
" remotes/origin/rel_4p0_patches@2941\n", | |
" remotes/origin/rel_4p0_patches@2954\n", | |
" remotes/origin/rel_4p0_patches@4060\n", | |
" remotes/origin/rel_4p1_patches\n", | |
" remotes/origin/rel_5p0_patches\n", | |
" remotes/origin/rel_5p1_patches\n", | |
" remotes/origin/rel_5p2_patches\n", | |
" remotes/origin/sicci\n", | |
" remotes/origin/sicci_sid\n", | |
" remotes/origin/sst-ist-l2-viirs-met\n", | |
" remotes/origin/tags/20140213_LRSID_ready_for_v5p0\n", | |
" remotes/origin/tags/LRDRIFT_PITCH_1811200813utc\n", | |
" remotes/origin/tags/LRDRIFT_PITCH_1811200813utc@2224\n", | |
" remotes/origin/tags/LRDRIFT_PITCH_1811200813utc@4058\n", | |
" remotes/origin/tags/LRDRIFT_PITCH_2411200810utc\n", | |
" remotes/origin/tags/LRDRIFT_PITCH_2411200810utc@2232\n", | |
" remotes/origin/tags/LRDRIFT_PITCH_2411200810utc@4058\n", | |
" remotes/origin/tags/LRicedrift_ReprocessingVersion_20091016\n", | |
" remotes/origin/tags/LRicedrift_ReprocessingVersion_20091016@2941\n", | |
" remotes/origin/tags/LRicedrift_ReprocessingVersion_20091016@2954\n", | |
" remotes/origin/tags/LRicedrift_ReprocessingVersion_20091016@3061\n", | |
" remotes/origin/tags/LRicedrift_ReprocessingVersion_20091016@4058\n", | |
" remotes/origin/tags/METNO_routine-it_May2011\n", | |
" remotes/origin/tags/METNO_routine-test_20141124\n", | |
" remotes/origin/tags/initial\n", | |
" remotes/origin/tags/lrsid_normap_reproc_v0.90\n", | |
" remotes/origin/tags/rel-5p2\n", | |
" remotes/origin/tags/rel_3p1\n", | |
" remotes/origin/tags/rel_3p2\n", | |
" remotes/origin/tags/rel_4p0\n", | |
" remotes/origin/tags/rel_4p0@2941\n", | |
" remotes/origin/tags/rel_4p0@4058\n", | |
" remotes/origin/tags/rel_4p1\n", | |
" remotes/origin/tags/rel_5p0\n", | |
" remotes/origin/tags/rel_5p1\n", | |
" remotes/origin/tags/rel_5p2\n", | |
" remotes/origin/tags/sicci-0.1\n", | |
" remotes/origin/trunk\n", | |
" remotes/origin/trunk@4059\n", | |
" remotes/origin/ubuntu-migration-branch\n", | |
" remotes/origin/ubuntu-migration-branch@2518\n", | |
" remotes/origin/ubuntu-migration-branch@4060\n", | |
"```" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#!gist sorted_by_size_not_in_master.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#!gist sorted_by_size.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_files = 'https://gist.githubusercontent.com/epifanio/a650b009531a6ea90806aa30f7622972/raw/723ed5f84d51d258a437568d90f457a93c00dccf/sorted_by_size.txt'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_files_except_master = 'https://gist.githubusercontent.com/epifanio/57881e30e95644cb22ac8e0cfeaaa4cc/raw/d3b8de8f026e09680285878e03568648ab679aaf/sorted_by_size_not_in_master.txt'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# all files sorted by size\n", | |
"im = pd.read_csv(all_files, \n", | |
" delim_whitespace=True, \n", | |
" header=None,\n", | |
" names=['commit', 'size', 'filename'],\n", | |
" error_bad_lines=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# all files sorted by size\n", | |
"# except the files in master branch\n", | |
"nim = pd.read_csv(all_files_except_master, \n", | |
" delim_whitespace=True, \n", | |
" header=None,\n", | |
" names=['commit', 'size', 'filename'],\n", | |
" error_bad_lines=False)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"im.count()[0] , nim.count()[0], im.count()[0] - nim.count()[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# check only the files with size expressesd in MiB\n", | |
"nim_mib = nim[nim['size'].str.contains('MiB')]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# filter out all the size less than a treshold (i.e.: 5Mib)\n", | |
"new_df = nim_mib[nim_mib[\"size\"].str.split(\"MiB\", \n", | |
" n = 1, \n", | |
" expand = True)[0].values.astype('float64') >= 5]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# check \"approximate\" unique list of file extension" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"set([i.split('/')[-1].split('.')[-1] for i in new_df['filename'] if len(i.split('/')[-1].split('.')[-1]) <=4 ])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fl = list(new_df['filename'].drop_duplicates().values)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"fl[:10]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"new_df['filename'].drop_duplicates().to_csv('file_list.csv', header=False, index=None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!gist file_list.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cd /home/epinux/dev/osisaf_migration2/osisaf/" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"!git status" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Rewrite 22dbd9f19b233922b20be39142e3cf5ea9e5858c (1863/11042) (97 seconds passed, remaining 477 predicted) " | |
] | |
} | |
], | |
"source": [ | |
"for i in fl:\n", | |
" filename = i\n", | |
" cmd = f'''git filter-branch --force --index-filter \"git rm --cached --ignore-unmatch {filename}\" --prune-empty --tag-name-filter cat -- --all'''\n", | |
" !{cmd}" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment