quaxsze/deletion_scripts.sh

## deletion_scripts.sh
#!/bin/bash

resources_path="/srv/nfs/datagouv/resources"
avatars_path="/srv/nfs/datagouv/avatars"
images_path="/srv/nfs/datagouv/images"

echo "Processing resources"
resources_counter=0
for f in $(find -type f "$resources_path")
do
    foo=${f#"$resources_path/"}
    if ! grep -Fxq "$foo" resource_fs.txt
    then
        echo $f >> resources_deletion.txt
        if [ $1 == "-d" ]
        then
            continue
        fi
        rm "$f"
        (( resources_counter++ ))
    fi
done

echo "Resources completed"
echo "Resources deletion: ${resources_counter}"

echo "Processing avatars"
avatars_counter=0
for f in $(find -type f "$avatars_path")
do
    match=0
    while read p
    do
        if  [[ $f == *$p* ]]
        then
            match=1
            break
        fi
    done < avatar_fs.txt
    if [ $match -eq 0 ]
    then
        echo $f >> avatars_deletion.txt
        if [ $1 == "-d" ]
        then
            continue
        fi
        rm "$f"
        (( avatars_counter++ ))
    fi
done
echo "Avatars completed"
echo "Avatars deletion: ${avatars_counter}"

echo "Processing images"
images_counter=0
for f in $(find -type f "$images_path")
do
    match=0
    while read p
    do
        if  [[ $f == *$p* ]]
        then
            match=1
            break
        fi
    done < image_fs.txt
    if [ $match -eq 0 ]
    then
        echo $f >> images_deletion.txt
        if [ $1 == "-d" ]
        then
            continue
        fi
        rm "$f"
        (( images_counter++ ))
    fi
done
echo "Images completed"
echo "Images deletion: ${images_counter}"

## fs_scripts.py
from udata.app import create_app
from udata.core import init_app
from udata.models import Dataset, CommunityResource, Organization, User, Reuse


def main():
    app = create_app()
    init_app(app)
    with app.app_context():
        print('Processing resources and community resources.')

        datasets = Dataset.objects()
        community_resources = CommunityResource.objects()
        with open('resource_fs.txt', 'w') as f:
            for dataset in datasets:
                for resource in dataset.resources:
                    if resource.url.startswith('https://static.data.gouv.fr'):
                        fs_name = resource.url[38:]
                        f.write(f'{fs_name}\n')

            for community_resource in community_resources:
                if community_resource.url.startswith('https://static.data.gouv.fr'):
                    fs_name = community_resource.url[38:]
                    f.write(f'{fs_name}\n')

        print('Processing organizations logos and users avatars.')
        orgs = Organization.objects()
        users = User.objects()
        with open('avatar_fs.txt', 'w') as f:
            for org in orgs:
                if org.logo.filename:
                    split_filename = org.logo.filename.split('.')
                    f.write(f'{split_filename[0]}\n')

            for user in users:
                if user.avatar.filename:
                    split_filename = user.avatar.filename.split('.')
                    f.write(f'{split_filename[0]}\n')

        print('Processing reuses logos.')
        reuses = Reuse.objects()
        with open('image_fs.txt', 'w') as f:
            for reuse in reuses:
                if reuse.image.filename:
                    split_filename = reuse.image.filename.split('.')
                    f.write(f'{split_filename[0]}\n')

        print('Completed.')


if __name__ == "__main__":
    main()
	#!/bin/bash

	resources_path="/srv/nfs/datagouv/resources"
	avatars_path="/srv/nfs/datagouv/avatars"
	images_path="/srv/nfs/datagouv/images"

	echo "Processing resources"
	resources_counter=0
	for f in $(find -type f "$resources_path")
	do
	foo=${f#"$resources_path/"}
	if ! grep -Fxq "$foo" resource_fs.txt
	then
	echo $f >> resources_deletion.txt
	if [ $1 == "-d" ]
	then
	continue
	fi
	rm "$f"
	(( resources_counter++ ))
	fi
	done

	echo "Resources completed"
	echo "Resources deletion: ${resources_counter}"

	echo "Processing avatars"
	avatars_counter=0
	for f in $(find -type f "$avatars_path")
	do
	match=0
	while read p
	do
	if [[ $f == $p ]]
	then
	match=1
	break
	fi
	done < avatar_fs.txt
	if [ $match -eq 0 ]
	then
	echo $f >> avatars_deletion.txt
	if [ $1 == "-d" ]
	then
	continue
	fi
	rm "$f"
	(( avatars_counter++ ))
	fi
	done
	echo "Avatars completed"
	echo "Avatars deletion: ${avatars_counter}"

	echo "Processing images"
	images_counter=0
	for f in $(find -type f "$images_path")
	do
	match=0
	while read p
	do
	if [[ $f == $p ]]
	then
	match=1
	break
	fi
	done < image_fs.txt
	if [ $match -eq 0 ]
	then
	echo $f >> images_deletion.txt
	if [ $1 == "-d" ]
	then
	continue
	fi
	rm "$f"
	(( images_counter++ ))
	fi
	done
	echo "Images completed"
	echo "Images deletion: ${images_counter}"
	from udata.app import create_app
	from udata.core import init_app
	from udata.models import Dataset, CommunityResource, Organization, User, Reuse


	def main():
	app = create_app()
	init_app(app)
	with app.app_context():
	print('Processing resources and community resources.')

	datasets = Dataset.objects()
	community_resources = CommunityResource.objects()
	with open('resource_fs.txt', 'w') as f:
	for dataset in datasets:
	for resource in dataset.resources:
	if resource.url.startswith('https://static.data.gouv.fr'):
	fs_name = resource.url[38:]
	f.write(f'{fs_name}\n')

	for community_resource in community_resources:
	if community_resource.url.startswith('https://static.data.gouv.fr'):
	fs_name = community_resource.url[38:]
	f.write(f'{fs_name}\n')

	print('Processing organizations logos and users avatars.')
	orgs = Organization.objects()
	users = User.objects()
	with open('avatar_fs.txt', 'w') as f:
	for org in orgs:
	if org.logo.filename:
	split_filename = org.logo.filename.split('.')
	f.write(f'{split_filename[0]}\n')

	for user in users:
	if user.avatar.filename:
	split_filename = user.avatar.filename.split('.')
	f.write(f'{split_filename[0]}\n')

	print('Processing reuses logos.')
	reuses = Reuse.objects()
	with open('image_fs.txt', 'w') as f:
	for reuse in reuses:
	if reuse.image.filename:
	split_filename = reuse.image.filename.split('.')
	f.write(f'{split_filename[0]}\n')

	print('Completed.')


	if __name__ == "__main__":
	main()