Now this is a situation where I got stucked into the other day where df -h showed me the partition was 100% full disrupting many of the the applications using that partition.
# df -h /var/
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/VolGroup00-var
3.9G 3.9G 0 100% /var
But when I went into the partition and checked the size using du, I saw the partition is only 80% full occupying 2.9G data out of 3.9G
[root@server1 var]# du -sh
2.9G .
So where are the other files inside /var occupying the size on the hard disk?
To check that lets see the process using /var which can be checked using lsof
# lsof /var
Explanation:
While going through the list of processes I saw a long list of deleted files which were still occupied by the PID.
What are these deleted files?
These are the files which were being used by the command as shown in first column which in our case is rhn_check. But once the steps were completed the files were deleted as a part of the command procedure but since the command is still executing these files are locked and will be in the same state unless the PID is released or the command is executed completely.
# lsof /var | grep -i deleted
rhn_check 31261 root 8u REG 253,2 22082560 327733 /var/cache/yum/prod-03-epel-x86_64-server-5-rhel5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 9u REG 253,2 78848 327737 /var/cache/yum/prod-03-likewise-x86_64-client-5-rhel5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 10u REG 253,2 144384 327741 /var/cache/yum/prod-03-mssb-x86_64-server-5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 11u REG 253,2 54056960 327748 /var/cache/yum/prod-03-rhel-x86_64-server-5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 12u REG 253,2 9275392 327752 /var/cache/yum/prod-03-rhel-x86_64-server-supplementary-5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 13u REG 253,2 582656 327756 /var/cache/yum/prod-03-rhn-tools-rhel-x86_64-server-5-rhel5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 14u REG 253,2 34002944 327758 /var/cache/yum/prod-03-epel-x86_64-server-5-rhel5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 15u REG 253,2 33857536 327760 /var/cache/yum/prod-03-epel-x86_64-server-5-rhel5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 16u REG 253,2 23552 327762 /var/cache/yum/prod-03-likewise-x86_64-client-5-rhel5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 17u REG 253,2 6144 327765 /var/cache/yum/prod-03-likewise-x86_64-client-5-rhel5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 18u REG 253,2 98304 327767 /var/cache/yum/prod-03-mssb-x86_64-server-5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 19u REG 253,2 68608 327769 /var/cache/yum/prod-03-mssb-x86_64-server-5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 20u REG 253,2 227818496 327772 /var/cache/yum/prod-03-rhel-x86_64-server-5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 21uw REG 253,2 489109504 327774 /var/cache/yum/prod-03-rhel-x86_64-server-5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 22r REG 253,2 135095452 327773 /var/cache/yum/prod-03-rhel-x86_64-server-5/other.xml.gz (deleted)
You can also view these deleted files using the below command
Syntax:
lsof +aL1 /file-system
# lsof +aL1 /var
COMMAND PID USER FD TYPE DEVICE SIZE NLINK NODE NAME
rhn_check 31261 root 8u REG 253,2 22082560 0 327733 /var/cache/yum/prod-03-epel-x86_64-server-5-rhel5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 9u REG 253,2 78848 0 327737 /var/cache/yum/prod-03-likewise-x86_64-client-5-rhel5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 10u REG 253,2 144384 0 327741 /var/cache/yum/prod-03-mssb-x86_64-server-5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 11u REG 253,2 54056960 0 327748 /var/cache/yum/prod-03-rhel-x86_64-server-5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 12u REG 253,2 9275392 0 327752 /var/cache/yum/prod-03-rhel-x86_64-server-supplementary-5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 13u REG 253,2 582656 0 327756 /var/cache/yum/prod-03-rhn-tools-rhel-x86_64-server-5-rhel5/primary.xml.gz.sqlite (deleted)
rhn_check 31261 root 14u REG 253,2 34002944 0 327758 /var/cache/yum/prod-03-epel-x86_64-server-5-rhel5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 15u REG 253,2 33857536 0 327760 /var/cache/yum/prod-03-epel-x86_64-server-5-rhel5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 16u REG 253,2 23552 0 327762 /var/cache/yum/prod-03-likewise-x86_64-client-5-rhel5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 17u REG 253,2 6144 0 327765 /var/cache/yum/prod-03-likewise-x86_64-client-5-rhel5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 18u REG 253,2 98304 0 327767 /var/cache/yum/prod-03-mssb-x86_64-server-5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 19u REG 253,2 68608 0 327769 /var/cache/yum/prod-03-mssb-x86_64-server-5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 20u REG 253,2 227818496 0 327772 /var/cache/yum/prod-03-rhel-x86_64-server-5/filelists.xml.gz.sqlite (deleted)
rhn_check 31261 root 21uw REG 253,2 489109504 0 327774 /var/cache/yum/prod-03-rhel-x86_64-server-5/other.xml.gz.sqlite (deleted)
rhn_check 31261 root 22r REG 253,2 135095452 0 327773 /var/cache/yum/prod-03-rhel-x86_64-server-5/other.xml.gz (deleted)
Check the total size locked by the deleted files( for first command)
# lsof /var | gawk -F" " '{ print $7}' |sort| uniq| awk '{ sum += $1} END { print sum/(1024*1024*1024) }'
0.937209
So around 0.94 GB size of files are locked. Now I hope I make myself clear the reason for the size difference between du and df command.
For the second command
# lsof +aL1 /var | awk -F " " '{print $7}' | sort | uniq | awk '{ sum += $1} END { print sum/(1024*1024*1024*1024) }'
0.937209
Solution:
Let us check other files used by this PID (I have trimmed the o/p as it was a long one)
# lsof -p 31261
rhn_check 31261 root mem REG 253,0 143144 557343 /lib64/libexpat.so.0.5.0
rhn_check 31261 root mem REG 253,0 372912 950724 /usr/lib64/python2.4/site-packages/M2Crypto/__m2crypto.so
rhn_check 31261 root mem REG 253,0 7120 918933 /usr/lib64/python2.4/lib-dynload/_weakref.so
rhn_check 31261 root mem REG 253,0 34184 919549 /usr/lib64/python2.4/site-packages/_sqlite.so
rhn_check 31261 root mem REG 253,0 41784 919601 /usr/lib64/python2.4/site-packages/_sqlitecache.so
rhn_check 31261 root mem REG 253,0 647608 557099 /lib64/libglib-2.0.so.0.1200.3
rhn_check 31261 root mem REG 253,0 1297104 763802 /usr/lib64/libxml2.so.2.6.26
rhn_check 31261 root mem REG 253,0 25104 920528 /usr/lib64/python2.4/lib-dynload/termios.so
rhn_check 31261 root mem REG 253,0 7280 920527 /usr/lib64/python2.4/lib-dynload/syslog.so
rhn_check 31261 root 0uW REG 253,2 5 163893 /var/run/rhn_check.pid
rhn_check 31261 root 1w FIFO 0,6 901384192 pipe
rhn_check 31261 root 2w FIFO 0,6 901384192 pipe
rhn_check 31261 root 3r REG 253,2 32641024 622598 /var/lib/rpm/Packages
rhn_check 31261 root 4r REG 253,2 344064 622601 /var/lib/rpm/Providename
rhn_check 31261 root 5r REG 253,0 19517 920332 /usr/share/rhn/actions/packages.py
rhn_check 31261 root 6u unix 0xffff8106770ba980 901384670 socket
rhn_check 31261 root 7w REG 253,2 107 32851 /var/log/yum.log
As you see there are many other files which are still in use and the command rhn_check still seems to be executing. Now at this point of time you need to decide if the space is important or the service/command executing. Because if you go ahead and kill that PID then the service responsible with this PID would be dead affecting your applications and usage.
In my case the rhn_check command is not of very much importance at this point of time so I can go ahead and kill it
# kill -9 31261
$ df -h /var/
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/VolGroup00-var
3.9G 3.0G 703M 82% /var
The other better option for this problem is restart the required service. For example if any of your service like named, httpd etc are locking any deleted files so it is better to restart the service. With this all the locked deleted files would be released. So instead of killing the PID this can be a better option.
Related Articles:
What is kernel-PAE in Linux?
What is a Kernel in Linux?
What is swappiness and how do we change its value?
What is the difference between POP3 and IMAP?
What is GRUB Boot Loader ?