Python crawler controller
Here is my code :
12 import shlexfrom subprocess import Popen, PIPEsite_dt = {‘Site1 Name’ : [‘site1_crawler.py’, ‘site1_crawler.out’],
1 'Site2 Name' : ['site2_crawler.py', 'site2_crawler.out']}location = “/home/crawler/”
pidfp = open(‘pid.txt’, ‘w’)
def is_running(pname):
p1 = Popen([“ps”, “ax”], stdout=PIPE)
p2 = Popen([“grep”, pname], stdin=p1.stdout, stdout=PIPE)
p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits.
output = p2.communicate()[0]
if output.find(‘/home/crawler/’+pname) > -1:
return True
return Falsedef main():
for item in site_dt.keys():
print item
if is_running(site_dt[item][0]) is True:
print site_dt[item][0], “already running”
continue
cmd = “python ” + location + site_dt[item][0] + ” -l info”
outfile = “log/” + site_dt[item][1]
fp = open(outfile, ‘w’)pid = Popen(shlex.split(cmd), stdout=fp).pid
print pid
pidfp.write(item + “: ” + pid + “n”)pidfp.close()
if __name__ == “__main__”:
main()
If you feel that there is scope for improvement, please comment.