Skip to content

Instantly share code, notes, and snippets.

@quaxsze
Last active September 30, 2020 11:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save quaxsze/bbf2ab4f02b0299bb8e401b069c5e862 to your computer and use it in GitHub Desktop.
Save quaxsze/bbf2ab4f02b0299bb8e401b069c5e862 to your computer and use it in GitHub Desktop.
#!/bin/bash
resources_path="/srv/nfs/datagouv/resources"
avatars_path="/srv/nfs/datagouv/avatars"
images_path="/srv/nfs/datagouv/images"
echo "Processing resources"
resources_counter=0
for f in $(find -type f "$resources_path")
do
foo=${f#"$resources_path/"}
if ! grep -Fxq "$foo" resource_fs.txt
then
echo $f >> resources_deletion.txt
if [ $1 == "-d" ]
then
continue
fi
rm "$f"
(( resources_counter++ ))
fi
done
echo "Resources completed"
echo "Resources deletion: ${resources_counter}"
echo "Processing avatars"
avatars_counter=0
for f in $(find -type f "$avatars_path")
do
match=0
while read p
do
if [[ $f == *$p* ]]
then
match=1
break
fi
done < avatar_fs.txt
if [ $match -eq 0 ]
then
echo $f >> avatars_deletion.txt
if [ $1 == "-d" ]
then
continue
fi
rm "$f"
(( avatars_counter++ ))
fi
done
echo "Avatars completed"
echo "Avatars deletion: ${avatars_counter}"
echo "Processing images"
images_counter=0
for f in $(find -type f "$images_path")
do
match=0
while read p
do
if [[ $f == *$p* ]]
then
match=1
break
fi
done < image_fs.txt
if [ $match -eq 0 ]
then
echo $f >> images_deletion.txt
if [ $1 == "-d" ]
then
continue
fi
rm "$f"
(( images_counter++ ))
fi
done
echo "Images completed"
echo "Images deletion: ${images_counter}"
from udata.app import create_app
from udata.core import init_app
from udata.models import Dataset, CommunityResource, Organization, User, Reuse
def main():
app = create_app()
init_app(app)
with app.app_context():
print('Processing resources and community resources.')
datasets = Dataset.objects()
community_resources = CommunityResource.objects()
with open('resource_fs.txt', 'w') as f:
for dataset in datasets:
for resource in dataset.resources:
if resource.url.startswith('https://static.data.gouv.fr'):
fs_name = resource.url[38:]
f.write(f'{fs_name}\n')
for community_resource in community_resources:
if community_resource.url.startswith('https://static.data.gouv.fr'):
fs_name = community_resource.url[38:]
f.write(f'{fs_name}\n')
print('Processing organizations logos and users avatars.')
orgs = Organization.objects()
users = User.objects()
with open('avatar_fs.txt', 'w') as f:
for org in orgs:
if org.logo.filename:
split_filename = org.logo.filename.split('.')
f.write(f'{split_filename[0]}\n')
for user in users:
if user.avatar.filename:
split_filename = user.avatar.filename.split('.')
f.write(f'{split_filename[0]}\n')
print('Processing reuses logos.')
reuses = Reuse.objects()
with open('image_fs.txt', 'w') as f:
for reuse in reuses:
if reuse.image.filename:
split_filename = reuse.image.filename.split('.')
f.write(f'{split_filename[0]}\n')
print('Completed.')
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment