Module Command
[hide private]

Source Code for Module Command

   1  """ 
   2  Command.py implementation of shell like file manipulation functions.                                       
   3  (c) 2009 Michel J. Anders 
   4   
   5  Version: 0.01a 
   6   
   7  This program is free software: you can redistribute it and/or modify 
   8  it under the terms of the GNU General Public License as published by 
   9  the Free Software Foundation, either version 3 of the License, or 
  10  (at your option) any later version. 
  11   
  12  This program is distributed in the hope that it will be useful, 
  13  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  15  GNU General Public License for more details. 
  16   
  17  You should have received a copy of the GNU General Public License 
  18  along with this program.  If not, see <http://www.gnu.org/licenses/>. 
  19   
  20  Many of the file manipulation functions are already available in the module C{shutil} but they almost 
  21  always 'single src argument' whereas this implementation enhances a number of 
  22  commands with the possibility to pass more than one src file. 
  23   
  24  Also implemented are a number of building blocks and actual functions to mimick 
  25  shell-like behaviour in the use of pipes. E.g. it is possible to write:: 
  26   
  27      cat('*.data','*.txt') | grep('oyster') > stdout 
  28   
  29  or even this:: 
  30       
  31      ls -a | grep('foobar') -v --ignorecase | sort -r 
  32   
  33  The above is made possible with a couple of tricks: 
  34       
  35      1. overloading the C{__sub__()} and C{__neg__()} operators (for long and short options) 
  36      2. preassigning instances to variables (so you can use C{ls} intead of C{ls()}  
  37          this was already used in U{this recipe <http://code.activestate.com/recipes/276960/>}  
  38      3. defining C{__call__()} (so e.g. C{ls('aaa')} yields a new ls instance 
  39      4. redefining C{__repr__()} (so the result a pipeline is shown automatically when working interactive) 
  40      5. and of course redefining C{__or__()} (best seen in U{this recipe<http://code.activestate.com/recipes/576756/>}. 
  41   
  42  Note that this is syntactical sugar only: the 'functions' above are implemented 
  43  as classes and do not spawn subprocesses (although functions are provided that 
  44  DO spawn subprocesses). 
  45   
  46  A less technical introduction can be found U{here<../using-command.html>}. 
  47  """ 
  48   
  49  # system modules 
  50  from time import time 
  51  import time 
  52  import os 
  53  import os.path 
  54  import shutil 
  55  import sys 
  56  import re 
  57  from glob import glob 
  58  import traceback 
  59  import logging 
  60  import subprocess 
  61  from collections import defaultdict 
  62  import itertools 
  63  from copy import deepcopy 
  64   
  65  # package modules 
  66  import Varsub 
  67  import SshUpdate 
  68   
  69  # we don't export these by default, you have to use from Command import <name> 
  70  __utils__=['mux','muxnodst','bunch','expand','lopen', 
  71  'flatten','copy3','copytree2','linebuffer','inputfilterbuffer', 
  72  'outputfilterbuffer','pype','filter'] 
  73   
  74  # these are pipe enabled classes 
  75  __pipeable__=['cat','grep','egrep', 
  76  'sort','tee','uniq','infile','outfile', 
  77  'linecount','ls','stat','echo', 
  78  'callout','callpipe','replace', 
  79  'stdin','stdout','tolist'] 
  80   
  81  # these are non pipe enabled functions 
  82  __nonpipeable__=['touch','cp', 
  83  'sshupdate','rm','mkdir','mv', 
  84  'substitute','call','callout','callpipe', 
  85  'wget','pwd','chmod','cd'] 
  86   
  87  # these are options currently defined on some commands 
  88  __options__=['a','i','c','v','n'] 
  89   
  90  __all__=__pipeable__+__nonpipeable__+__options__ 
  91   
  92  # convenience shortcut 
  93   
  94  stdin=sys.stdin 
  95  stdout=sys.stdout 
96 97 # utility functions and classes 98 99 -def mux(f,*args,**kw):
100 """ 101 multiplex a function to any number of sourcefiles. 102 103 mux() calls function f() with a src and a dst argument 104 and any keyword arguments are passed along. After 105 expanding 'args' (see C{expand}), dst is the last 106 element of the resulting list and src is from the 107 rest of the list, but one by one. So, say you have 108 a function A(src,dst) a call like:: 109 110 mux(A,'*.txt','*.doc','summary.dat') 111 112 is equivalent to:: 113 114 A('a.txt','summary.dat') 115 A('b.txt','summary.dat') 116 A('c.doc','summary.dat') 117 118 depending on the actual files that match of course. 119 120 @param f: a function (callable) that takes two string arguments. 121 @param args: a (possibly nested) list of strings or glob patterns. 122 @returns: nothing. 123 """ 124 args=expand(args) 125 ignore=kw.pop('ignore',False) 126 dst=args.pop(-1) 127 if len(args)<1: raise TypeError('Wrong number of arguments') 128 if 'ignore_pattern'in kw: 129 kw['ignore']=kw['ignore_pattern'] 130 del kw['ignore_pattern'] 131 errors=[] 132 for src in args: 133 try: 134 f(src,dst,**kw) 135 except StandardError as why: 136 if not ignore: 137 raise 138 errors.append((src,dst,str(why))) 139 if len(errors)==0 : return None 140 return errors
141
142 -def muxnodst(f,*args,**kw):
143 """multiplex a function to any number of sourcefiles. 144 145 muxnodst() calls function f() with a single argument 146 and any keyword arguments are passed along. After 147 expanding 'args' (see C{expand}), each argument 148 is taken is from the resulting list, one by one. So, say you have 149 a function A(src) a call like:: 150 151 mux(A,'*.txt','*.doc') 152 153 is equivalent to:: 154 155 A('a.txt') 156 A('b.txt') 157 A('c.doc') 158 159 depending on the actual files that match of course. 160 161 @param f: a function (callable) that takes one string argument. 162 @param args: a (possibly nested) list of strings or glob patterns. 163 @returns: nothing. 164 """ 165 args=expand(args) 166 ignore=kw.pop('ignore',False) 167 if len(args)<1: raise TypeError('Wrong number of arguments') 168 if 'ignore_pattern'in kw: 169 kw['ignore']=kw['ignore_pattern'] 170 del kw['ignore_pattern'] 171 errors=[] 172 for src in args: 173 try: 174 f(src,**kw) 175 except StandardError as why: 176 if not ignore: 177 raise 178 errors.append((src,str(why))) 179 if len(errors)==0 : return None 180 return errors
181
182 -def bunch(*args,**kw):
183 """ 184 Process a number of files one by one and write the results to a file. 185 186 Note that this is different from the mux() funtion:: 187 188 mux() operates on src,dst filename-pairs 189 bunch() operates on the contents of several src files and writes to a dst file 190 191 A processor is passed by the keyword parameter and should be a 192 class that implements the blockprocess interface. By default a 193 simple copy processor is used. 194 195 The blockprocess interface:: 196 197 __init__() initializes a blockprocessor, 198 process(iterator) iterates over iterator (typically a file object or a list of lines) 199 end() ends the processing of input 200 results() a generator that returns results. 201 202 """ 203 args=flatten(list(args)) 204 dst=args.pop(-1) 205 p=kw.pop('processor',_copy) 206 p=p() 207 for a in args: 208 f=open(a,'rb') 209 try: 210 p.process(f) 211 finally: 212 f.close() 213 p.end() 214 f=open(dst,'wb') 215 for line in p.results(): 216 f.write(line) 217 f.close()
218
219 -def _xglob(arg,showhidden=False,hidenonexistent=False):
220 """ 221 An augmented version of glob.glob() 222 223 @returns: a (possibly empty) list of filenames that match arg 224 @keyword showhidden: if True, return matching files that start with '.' as well 225 @keyword hidenonexistent: if True return arg unchanged if nothing matches 226 227 The latter is usefull to define two different behaviours: 228 229 e.q. touch('aaa') creates a file, if 'aaa' doesn't exist but 230 ls('aaa') returns nothing if 'aaa' does not exist. 231 """ 232 result=glob(arg) 233 #print 'glob1',result 234 if arg.startswith('?') and showhidden : 235 result.extend(glob('.'+arg[1:])) 236 elif arg.startswith('*') and showhidden : 237 result.extend(glob('.'+arg)) 238 #print 'glob2',result 239 if len(result)==0 and hidenonexistent==False: 240 result=arg 241 #print 'globf',result 242 return result
243
244 -def expand(*args,**kw):
245 """ 246 Flatten a list of filenames, directory names 247 a glob patterns to a single flat list of filenames. 248 249 Example: 250 251 C{expand('filea',['fileb','filec'],'*.txt')} 252 253 might result in (depending on the content of the globbed dir): 254 255 C{['filea','fileb','filec','a.txt','b.txt']} 256 257 args that contain no glob chars ('?' or '*') are not globbed. 258 259 @param args: a (possibly nested) list of strings or glob patterns. 260 @keyword directories: if True names of directories are included in the result. 261 @returns: a list of strings (i.e. filenames although they need pot exist.) 262 """ 263 #print args,kw 264 directories=kw.pop('directories',False) 265 showhidden=kw.pop('showhidden',False) 266 hidenonexistent=kw.pop('hidenonexistent',False) 267 result=[f for f in flatten(map(lambda x:_xglob(x,showhidden=showhidden,hidenonexistent=hidenonexistent),flatten(list(args)))) if not os.path.isdir(f) or directories] 268 #print result 269 return result
270
271 -def lopen(*args):
272 """ 273 Expand a (possibly nested) list of strings and globs 274 and open all resulting filenames. Return a single 275 iterator that will iterate over all lines in all 276 opend files. 277 278 Example:: 279 280 for line in lopen('*.txt'): 281 print line, 282 283 will print all lines of all text files in the 284 current directory. 285 """ 286 return itertools.chain(*[open(f,'rb') for f in expand(*args)])
287
288 -def flatten(x):
289 """ 290 return a (possibly nested) list of iterables as a 291 single flat listen. Strings are *not* flattened. 292 For examples see expand(). 293 """ 294 result = [] 295 for el in x: 296 if hasattr(el, "__iter__") and not isinstance(el, basestring): 297 result.extend(flatten(el)) 298 else: 299 result.append(el) 300 return result
301
302 # building block functions additional to shutil 303 304 -def copy3(src, dst):
305 """ 306 Copy data and all stat info if src is newer ("cp -pu src dst"). 307 308 The destination may be a directory. 309 310 The source is also considered newer if the destination does not exist. 311 312 @returns: 1 if file is copied, 0 otherwise 313 """ 314 if os.path.isdir(dst): 315 dst = os.path.join(dst, os.path.basename(src)) 316 if not os.path.exists(dst) or os.stat(src).st_mtime>os.stat(dst).st_mtime: 317 shutil.copyfile(src, dst) 318 shutil.copystat(src, dst) 319 return 1 320 return 0
321
322 -def copytree2(src, dst, symlinks=False, ignore_pattern=None, update=False):
323 """ 324 Recursively copy a directory tree using copy2(). 325 326 The destination directory must not already exist, unless update=True. 327 328 If exception(s) occur, an Error is raised with a list of reasons. 329 330 If the optional symlinks flag is true, symbolic links in the 331 source tree result in symbolic links in the destination tree; if 332 it is false, the contents of the files pointed to by symbolic 333 links are copied. 334 335 The optional ignore_pattern argument is a callable. If given, it 336 is called with the `src` parameter, which is the directory 337 being visited by copytree(), and `names` which is the list of 338 `src` contents, as returned by os.listdir():: 339 340 callable(src, names) -> ignored_names 341 342 Since copytree() is called recursively, the callable will be 343 called once for each directory that is copied. It returns a 344 list of names relative to the `src` directory that should 345 not be copied. 346 347 The optional update flag ensures that files 348 are only copied if the source is newer than the 349 destination or if the destination does not exist. 350 351 Note that this if a rather straightforward reimplementation 352 of the shutil.copytree() function present in python 353 2.x and 3.x including most of the documentation. 354 355 @returns: the number of files copied. 356 357 """ 358 names = os.listdir(src) 359 if ignore_pattern is not None: 360 ignored_names = ignore(src, names) 361 else: 362 ignored_names = set() 363 364 files_copied=0 365 366 if os.path.exists(dst) and not os.path.isdir(dst): 367 try: 368 os.makedirs(dst) 369 except: 370 if not update : raise 371 372 errors = [] 373 for name in names: 374 if name in ignored_names: 375 continue 376 srcname = os.path.join(src, name) 377 dstname = os.path.join(dst, name) 378 try: 379 if symlinks and os.path.islink(srcname): 380 linkto = os.readlink(srcname) 381 os.symlink(linkto, dstname) 382 files_copied+=1 383 elif os.path.isdir(srcname): 384 files_copied+=copytree2(srcname, dstname, symlinks, ignore_pattern) 385 else: 386 if update: 387 files_copied+=copy3(srcname, dstname) 388 else: 389 shutil.copy2(srcname, dstname) 390 files_copied+=1 391 # XXX What about devices, sockets etc.? 392 except (IOError, os.error), why: 393 errors.append((srcname, dstname, str(why))) 394 # catch the Error from the recursive copytree so that we can 395 # continue with other files 396 except Error, err: 397 errors.extend(err.args[0]) 398 try: 399 shutil.copystat(src, dst) 400 except OSError, why: 401 if WindowsError is not None and isinstance(why, WindowsError): 402 # Copying file access times may fail on Windows 403 pass 404 else: 405 errors.extend((src, dst, str(why))) 406 if errors: 407 raise Error, errors 408 return files_copied
409
410 # helper classes to implement a shell-like way of coding. 411 412 -class stacker(object):
413 """ 414 A mixin that accumulates options and provides a factory for instances of the same class. 415 416 stacker saves the list of arguments and the dictionairy of keywords. It also offers a callable 417 interface that acts as af factory of new instance of a a class. When called it combines 418 all stored arguments and keywords with additional ones. This facilitates the following 419 pattern:: 420 421 RefToMyObjectInstance -option | ... 422 423 Here option is a reference to an instance of the class option. What happens in this 424 example is this (pseudocode):: 425 426 RefToMyObjectInstance = MyObject(option1=value) 427 RefToMyObjectInstance -option | ... 428 429 RefToMyObjectInstance.__sub__(option2) 430 calls: 431 RefToMyObjectInstance.__cal__(options2.name=option2.value) 432 results in: 433 an (anonymous) object of the same class as RefToMyObjectInstance with 434 both options (option1 and option2) set. 435 """
436 - def __init__(self,*args,**kw):
437 self._options=dict(kw) 438 self._args=list(args) 439 object.__init__(self)
440
441 - def __sub__(self,right):
442 if not isinstance(right,option): 443 raise NotImplementedError 444 return self(**{right.name:right.value})
445
446 - def __call__(self,*args,**kw):
447 """ 448 Facilitates using a instance as a factory for the class it belongs to. 449 450 This way we can create an instance of a class with the same 451 name as a class to be used as a shortcut when calling 452 without arguments while creating seperate anonimous 453 instances when calling with arguments. 454 455 Example: for a class 'ls' we can now do this in 456 Command.py:: 457 458 ls=ls() 459 460 This way we can now write the following:: 461 462 from Command import * 463 464 ls | sort > stdout 465 466 while we can still write:: 467 468 ls('*.txt') | sort(reverse=true) > stdout 469 470 extra arguments and options are accumlated which 471 allows us to write (with the help of the __sub__ 472 operator defined in the stacker class:: 473 474 ...| grep('aaa') -i -v | ... 475 476 here we create a grep instance and then 477 a new grep instance is created with the 478 -i (i.e. the __sub__ method on the grep 479 instance is called with the predefined 480 i (and instance of the option class).) 481 """ 482 if hasattr(self,'_options'): 483 kw.update(self._options) 484 if hasattr(self,'_args'): 485 args=list(args) 486 args.extend(self._args) 487 new=self.__class__(*args,**kw) 488 if hasattr(self,'buffer'): 489 new.buffer=deepcopy(self.buffer) 490 return new
491
492 -class linebuffer(object):
493 """ 494 A container you can write() lines to 495 and which provides an iterator to iterate 496 over the buffered contents. 497 498 It is a base class for line-by-line filter 499 components that facilitate a pype pattern. 500 501 This base class just copies without filtering. 502 Descendants should override either write() 503 or next(). See 'imputfilterbuffer' and 'grep' 504 for an actual implementation. 505 """
506 - def __init__(self,*args,**kw):
507 object.__init__(self)
508
509 - def __iter__(self):
510 self._buffer() 511 while len(self.buffer): 512 yield self.buffer.pop(0)
513
514 - def _buffer(self):
515 if not hasattr(self,'buffer'): 516 self.buffer=[]
517
518 - def write(self,data):
519 self._buffer() 520 self.buffer.append(data)
521 522 tolist=linebuffer()
523 524 -class inputfilterbuffer(linebuffer):
525 """ 526 Filter incoming data. 527 528 inputfilterbuffer(filter=lambda x:x) 529 530 @keyword filter: function taking one argument and returing a single item. 531 """
532 - def __init__(self,**kw):
533 self.filter=kw.get('filter',lambda x:x) 534 linebuffer.__init__(self,**kw)
535
536 - def write(self,data):
537 self._buffer() 538 self.buffer.append(self.filter(data))
539
540 -class outputfilterbuffer(linebuffer):
541 """ 542 a.k.a. blockbuffer, gather input lines and process them en block before producing output. 543 544 @keyword filter: an instance of a class with 2 methods: 545 - C{process(self,data)} stores the data 546 - C{results(self)} returns results. To be usefull in a pipeline this returned result should be an iterable. 547 """
548 - class _nullblock:
549 - def process(self,databuffer): self.b=databuffer
550 - def results(self): return self.b
551
552 - def __init__(self,**kw):
553 filter=kw.get('filter',None) 554 if filter: 555 self.filter=filter 556 else: 557 self.filter=outputfilterbuffer._nullblock() 558 self.first=True 559 linebuffer.__init__(self,**kw)
560
561 - def __iter__(self):
562 if self.first: 563 self._buffer() 564 self.filter.process(self.buffer) 565 self.first=False 566 self.results=self.filter.results() 567 while len(self.results) : 568 yield self.results.pop(0)
569
570 -class option(object):
571 """ 572 An abstraction of an option. 573 574 To be used with a stacker object. 575 """
576 - def __init__(self,name,value=True):
577 self.name=name 578 self.value=value 579 object.__init__(self)
580
581 - def __neg__(self):
582 """ 583 Gobles up a unary minus. 584 585 Facilitates long options. 586 """ 587 return self
588
589 - def __call__(self,value):
590 """ 591 Allow for ...| cmd -c(3) |... 592 """ 593 return self.__class__(name=self.name,value=value)
594 595 i=option('i') 596 v=option('v') 597 n=option('n') 598 a=option('a') 599 c=option('c')
600 601 -class pype(object):
602 """ 603 A mixin enabling a class that implements 604 the linebuffer interface to participate 605 in a shell-like pipeline. 606 """
607 - def __init__(self,*args,**kw):
608 object.__init__(self)
609
610 - def __or__(self,other):
611 """ 612 The pipe operator: writes iterable 613 content to righthand side object. 614 """ 615 for i in self: 616 other.write(i) 617 return other
618
619 - def __gt__(self,other):
620 """ 621 Like __or__ but intended as a terminator. 622 """ 623 self.__or__(other) 624 return None
625
626 - def __str__(self):
627 return ''.join(self)
628
629 - def __repr__(self):
630 return str(self)
631
632 # classes implementing the pype interface 633 634 -class cat(pype,stacker):
635 """ 636 Copies the contents of all files given as 637 argument and makes it available to the next 638 component in the pipeline. 639 640 example: cat('*.txt') | grep('aaa') > sys.stdout 641 """
642 - def __init__(self,*args):
643 self.iterable=lopen(args) 644 pype.__init__(self,*args) 645 stacker.__init(self,*args)
646
647 - def __iter__(self):
648 return self.iterable
649
650 -class grep(pype,stacker,inputfilterbuffer):
651 """ 652 copy all lines offered to it in a pipeline 653 and that contain one of its arguments 654 and makes it available to the next 655 component in the pipeline. 656 657 example: cat('*.txt') | grep('aaa') > sys.stdout 658 """
659 - def __init__(self,*args,**kw):
660 self.inverse=kw.get('v',False) 661 self.ignore=kw.get('i',False) 662 self.args=args 663 664 self.f=str 665 if self.ignore: 666 args=[a.lower() for a in args] 667 self.f=lambda x:str(x).lower() 668 def find(x): 669 y=self.f(x) 670 found=any(y.find(a)>-1 for a in self.args) 671 if found ^ self.inverse: 672 return x 673 return ''
674 kw['filter']=find 675 pype.__init__(self,*args,**kw) 676 stacker.__init__(self,*args,**kw) 677 inputfilterbuffer.__init__(self,**kw)
678
679 -class egrep(pype,stacker,inputfilterbuffer):
680 """ 681 copy all lines offered to it in a pipeline 682 and that match one of its regular expression arguments 683 and makes it available to the next 684 component in the pipeline. 685 686 example: cat('*.txt') | grep(r'^ab*c$') > sys.stdout 687 """
688 - def __init__(self,*args,**kw):
689 self.inverse=kw.get('v',False) 690 self.ignore=kw.get('i',False) 691 692 self.args=map(re.compile,args) 693 self.f=str 694 if self.ignore: 695 args=[a.lower() for a in args] 696 self.f=lambda x:str(x).lower() 697 def find(x): 698 y=self.f(x) 699 found=any(re.search(a,y) for a in self.args) 700 if found ^ self.inverse: 701 return x 702 return ''
703 kw['filter']=find 704 pype.__init__(self,*args,**kw) 705 stacker.__init__(self,*args,**kw) 706 inputfilterbuffer.__init__(self,*args,**kw)
707
708 -class sortclass(pype,stacker,outputfilterbuffer):
709 """Sort input lines and produce the sorted lines as output. 710 711 Example:: 712 713 cat('*.dat') | sort(reverse=True) > stdout 714 715 @keyword n: if True, compare lines as floating point numbers. 716 @keyword i: if True, ignore differences in lower and uppercase symbols. 717 @keyword r: if True, do a reverse sort. 718 """
719 - class dosort:
720 '''Auxiliary class. Implements the actual sorting via a process interface. 721 '''
722 - def __init__(self,**kw):
723 self.reverse=kw.get('r',False) 724 self.ignore=kw.get('i',False) 725 self.numeric=kw.get('n',False)
726 - def process(self,data):
727 self.data=data
728 - def results(self):
729 cmpf=None 730 if self.numeric: 731 cmpf=lambda x,y:cmp(float(x),float(y)) 732 elif self.ignore: 733 cmpf=lambda x,y:cmp(x.lower(),y.lower()) 734 self.data.sort(cmp=cmpf,reverse=self.reverse) 735 return self.data
736
737 - def __init__(self,**kw):
738 kw['filter']=sortclass.dosort(**kw) 739 pype.__init__(self,**kw) 740 stacker.__init__(self,**kw) 741 outputfilterbuffer.__init__(self,**kw)
742 743 sort=sortclass() # see pype.__call__
744 745 -class tee(pype,inputfilterbuffer):
746 """Copy input to output and to any files specified as arguments 747 748 Example:: 749 750 cat('*.dat') | tee('all.dat') > stdout 751 """
752 - def __init__(self,*args,**kw):
753 self.outfiles=[outfile(a) for a in args] 754 def tee(x): 755 for o in self.outfiles: 756 o.write(x) 757 return x
758 kw['filter']=tee 759 pype.__init__(self,*args,**kw) 760 outputfilterbuffer.__init__(self,*args,**kw)
761
762 -class uniqclass(pype,stacker,linebuffer):
763 """ 764 Return unique lines only, optionally prefixed with a linecount. 765 766 Assumes sorted input. 767 """
768 - def __init__(self,**kw):
769 self.count=kw.get('c',False) 770 self.ignore=kw.get('i',False) 771 if self.count: self.linecount=[] 772 pype.__init__(self,**kw) 773 stacker.__init__(self,**kw) 774 linebuffer.__init__(self,**kw)
775
776 - def write(self,data):
777 self._buffer() 778 if len(self.buffer)==0: 779 self.buffer.append(data) 780 if self.count: 781 self.linecount.append(1) 782 elif self.count and not self.ignore and self.buffer[-1]==data: 783 self.linecount[-1]+=1 784 elif self.count and self.ignore and str(self.buffer[-1]).lower()==str(data).lower(): 785 self.linecount[-1]+=1 786 else: 787 self.buffer.append(data) 788 if self.count: 789 self.linecount.append(1)
790
791 - def __iter__(self):
792 self._buffer() 793 while len(self.buffer): 794 if self.count: 795 yield self.linecount.pop(0)+' '+self.buffer.pop(0) 796 else: 797 yield self.buffer.pop(0)
798 799 uniq=uniqclass() # see: pype.__call__
800 801 -class infile(pype):
802 """ 803 Open a single named file and make it 804 available to a shell-like pipeline. 805 Example:: 806 807 infile('my.txt') | grep('the') > stdout 808 809 """
810 - def __init__(self,filename):
811 self.iterable=open(filename,'rb') 812 pype.__init__(self,filename)
813
814 - def __iter__(self): return self.iterable
815
816 - def write(self): raise NotImplementedError
817
818 -class outfile(object):
819 """ 820 Open a single named file and make it 821 available to a shell-like pipeline. 822 Example:: 823 824 infile('my.txt') | grep('the') > outfile('the.txt') 825 """
826 - def __init__(self,filename):
827 if filename=='-': 828 self.file=sys.stdout 829 else: 830 self.file=open(filename,'wb') 831 self.write=self.file.write 832 object.__init__(self)
833
834 835 -class linecountclass(pype,stacker,outputfilterbuffer):
836 """ 837 An example of a blockprocessor. 838 839 It produces all sorts of statistics 840 for consumed text. Example:: 841 842 infile('my.txt') | linecount() > stdout 843 844 """
845 - def __init__(self,**kw):
846 self.linesonly=kw.get('l',False) 847 self.wordsonly=kw.get('w',False) 848 self.ignore=kw.get('i',False) 849 self.lines=0 850 self.files=0 851 self.words=0 852 self.wordlength=defaultdict(int) 853 self.linelength=defaultdict(int) 854 self.wordfreq=defaultdict(int) 855 kw['filter']=self 856 pype.__init__(self,**kw) 857 stacker.__init__(self,**kw) 858 outputfilterbuffer.__init__(self,**kw)
859 860 _wordpattern=re.compile(r'\W+') 861
862 - def _split(self,line):
863 return re.split(linecount._wordpattern,line)
864
865 - def process(self,reader):
866 for line in reader: 867 line=line.strip() 868 if self.ignore: line=line.lower() 869 if len(line)==0:continue 870 wordsinline=0 871 if not self.linesonly: 872 wsl=self._split(line) 873 if not self.wordsonly: 874 for word in wsl: 875 word=word.strip() 876 if len(word)==0:continue 877 word=word.lower() 878 self.wordfreq[word]+=1 879 self.wordlength[len(word)]+=1 880 self.words+=1 881 wordsinline+=1 882 else: 883 self.words+=1 884 self.linelength[wordsinline]+=1 885 self.lines+=1 886 self.files+=1
887
888 - def _stats(self,dd):
889 hi=max(dd) 890 yield('longest',float(hi)) 891 avg=sum(length*freq for length,freq in dd.items())/float(sum(dd.values())) 892 yield('average',avg) 893 total=[dd[i] for i in range(hi+1)] 894 half=0 895 s=sum(total)/2.0 896 for i in range(1,hi+1): 897 total[i]+=total[i-1] 898 if total[i]>=s: 899 half=i 900 break 901 yield ('median',float(half))
902
903 - def _freq(self):
904 l=sorted(list(self.wordfreq.items()),key=lambda x:x[1],reverse=True) 905 n=min(len(l),10) 906 for i in range(n): 907 yield(l[i][0],l[i][1],100*l[i][1]/float(self.words))
908
909 - def results(self):
910 if self.linesonly: return [str(self.lines)+'\n'] 911 if self.wordsonly: return [str(self.words)+'\n'] 912 return [ "%12s: %d\n"%('files',self.files), 913 "%12s: %d\n"%('lines',self.lines), 914 "%12s: %d\n"%('words',self.words), 915 "linelength\n"]+["%12s: %.1f\n"%lv 916 for lv in self._stats(self.linelength)]+["wordlength\n"]+["%12s: %.1f\n"%lv 917 for lv in self._stats(self.wordlength)]+["most frequently used words\n"]+["%12s: %5d %.1f\n"%wnf 918 for wnf in self._freq()]
919 920 linecount=linecountclass() # see: pype.__call__ 921 wc=linecount
922 923 -class lsclass(pype,stacker):
924 """ 925 Generate a list of matching file and 926 directory names and make this available 927 to the next component in a pipeline. 928 929 if no arguments are given, '*' is assumed. 930 931 Example:: 932 ls('*.txt') > outfile('listing') 933 """ 934
935 - def __init__(self,*args,**kw):
936 if len(args)==0: args=['*'] 937 self.all=kw.get('a',False) 938 pype.__init__(self,*args,**kw) 939 stacker.__init__(self,*args,**kw)
940 941 # we should expand this again for reuse of ls instance
942 - def __iter__(self):
943 args=self._args 944 if len(args)==0: args=['*'] 945 return itertools.chain([f+'\n' for f in expand(args,directories=True,hidenonexistent=True,showhidden=self.all)])
946 947 ls=lsclass() # see: pype.__call__
948 949 -class pwdclass(pype):
950 """ 951 make the name of the current directory available to the pipeline. 952 """ 953
954 - def __iter__(self): return iter([os.getcwd()])
955 956 pwd=pwdclass()
957 958 -class echo(pype,stacker,linebuffer):
959 """ 960 Make elements of a list available to a pipeline. 961 """
962 - def __init__(self,*args,**kw):
963 pype.__init__(self,*args,**kw) 964 stacker.__init__(self,*args,**kw) 965 linebuffer.__init__(self,*args,**kw) 966 self.buffer=[str(a)+'\n' for a in flatten(args)]
967
968 969 -class statclass(pype,stacker,inputfilterbuffer):
970 """ 971 find filesystem attributes for a list of files. 972 """ 973 @staticmethod
974 - def pctime(epoch):
975 return time.strftime("%Y%m%d %H:%M",time.localtime(epoch))
976 977 @staticmethod
978 - def modebits(n):
979 bits=['---','--x','-w-','-wx', 'r--', 'r-x','rw-', 'rwx'] 980 o=n&7 981 n>>=3 982 g=n&7 983 n>>=3 984 u=n&7 985 n>>=3 986 d=' ' 987 if n&32: d='d' # directory 988 a=' ' 989 if n&64: a='+' # extend access control 990 return d+bits[u]+bits[g]+bits[o]+a
991 992 @staticmethod
993 - def foldsize(size):
994 for n,s in zip((9,6,3),('G','M','K')): 995 if size>10**n: 996 return '%.1f%s'%(size/10.0**n,s)
997 - def statfilter(self,filename):
998 filename=filename.rstrip() 999 s='%11s %-30s\n'%('?',filename) 1000 try: 1001 st=os.stat(filename) 1002 size=st.st_size 1003 if self._options.get('n',False): 1004 size=statclass.foldsize(size) 1005 s='%11s %-30s %10s %s\n'%(stat.modebits(st.st_mode),filename,size,stat.pctime(st.st_ctime)) 1006 except IOError: 1007 pass 1008 return s
1009
1010 - def __init__(self,**kw):
1011 kw['filter']=self.statfilter 1012 pype.__init__(self,**kw) 1013 stacker.__init__(self,**kw) 1014 inputfilterbuffer.__init__(self,**kw)
1015 1016 stat=statclass() # see: pype.__call__
1017 1018 # commands that operate on file(name)s and 1019 # or directories. 1020 1021 -def touch(*args,**kw):
1022 1023 """ 1024 set the mtime (modification time) 1025 of files to the current time, 1026 creating empty files for files that 1027 do not exist. 1028 1029 @param args: one or more filenames 1030 @type args: str 1031 @keyword ignore: if True all errors are ignored. 1032 @returns: a (possibly empty) list of errors. 1033 """ 1034 1035 def t(f): 1036 if os.path.exists(f) : 1037 os.utime(f,None) 1038 else: 1039 f=open(f,'wb') 1040 f.close()
1041 1042 return muxnodst(t,*args,**kw) 1043
1044 1045 -def cp(*args,**kw):
1046 """ 1047 Copy files and/or directories. 1048 1049 @param args: one or more filenames 1050 @type args: str 1051 @keyword ignore: if True all errors are ignored. 1052 @keyword recursive: if True contents of directories are copied as well. 1053 @keyword preserve: if True modification times of copied objects is retained. 1054 @keyword update: copy only if src newer than dst. 1055 @keyword ignore_pattern : see shutil2.copy3 or shutil.copytree 1056 @returns: undocumented. 1057 """ 1058 1059 recursive=kw.get('recursive',False) 1060 preserve=kw.get('preserve',False) 1061 update=kw.get('update',False) 1062 if recursive: del kw['recursive'] 1063 if preserve: del kw['preserve'] 1064 if update: del kw['update'] 1065 1066 if recursive: 1067 if update : kw['update']=True 1068 return mux(copytree2,*args,**kw) 1069 if update: 1070 return mux(copy3,*args,**kw) 1071 if preserve: 1072 return mux(shutil.copy2,*args,**kw) 1073 return mux(shutil.copy,*args,**kw)
1074
1075 -def sshupdate(user,host,*args,**kw):
1076 """ 1077 Update files and/or directories over a 1078 secure connection to 1079 a destination on host if they are newer than 1080 their counterpart in the destination. 1081 1082 Newer is defined as having a newer 1083 modification time or when the 1084 destination does not exist. 1085 1086 If a password is required a prompt is 1087 issued. If non-interrupted operation 1088 is required your ssh client configuration 1089 should be set up to work with certificates. 1090 See 'paramiko' and 'Braid.SshUpdate' for 1091 details. 1092 1093 @param user: username to log in on sshserver. 1094 @type user: str 1095 @param host: hostname or ip-address of sshserver 1096 @type host: str 1097 @param args: one or more filenames 1098 @type args: str 1099 @keyword ignore: if True all errors are ignored. 1100 @returns: nothing. 1101 """ 1102 1103 if SshUpdate.available: 1104 SshUpdate.sshupdate(user,host,*args,**kw) 1105 else: 1106 logging.error('sshupdate not available')
1107
1108 -def rm(*args,**kw):
1109 """ 1110 Remove files and/or directories. 1111 @param args: file/directory name 1112 @type args: str 1113 @keyword recursive: if True directory contents are removed as well. 1114 @keyword ignore: if True all errors are ignored. 1115 @returns: a (possibly empty) list of errors. 1116 """ 1117 1118 if kw.get('recursive',False): 1119 del kw['recursive'] 1120 return muxnodst(shutil.rmtree,*args,**kw) 1121 return muxnodst(shutil.remove,*args,**kw)
1122
1123 -def mkdir(*args,**kw):
1124 """ 1125 Create directories. 1126 1127 @param args: directory name 1128 @type args: str 1129 @keyword recursive: if True missing intermedate directories are created as well. 1130 @keyword ignore: if True all errors are ignored. 1131 @return: a (possibly empty) list of errors. 1132 """ 1133 1134 if kw.get('recursive',False): 1135 del kw['recursive'] 1136 return muxnodst(os.makedirs,*args,**kw) 1137 return muxnodst(os.mkdir,*args,**kw)
1138
1139 -def cd(path):
1140 """ 1141 change the current directory to path. 1142 1143 @param path: the relative or absolute path to the new directory. 1144 """ 1145 os.chdir(path)
1146
1147 -def chmod(mode,*args):
1148 """ 1149 Change the read, write and execute bits for directories or files. 1150 1151 @param mode: a comma seperated list of mode assigments. 1152 1153 Mode assigments have the following forms C{item=modes}. Item is one 1154 or more of:: 1155 u : means user 1156 g : means group 1157 o : means other 1158 a : shortcut for ugo 1159 1160 Modes is one or more of:: 1161 r : read 1162 w : writw 1163 x : execute (means accessible for directories) 1164 1165 Example:: 1166 1167 chmod('ug=rw,o=r', 'filea.txt', 'fileb.txt') 1168 1169 Means make filea.txt and fileb.txt readable for others and also writeable 1170 for the user and the group. 1171 """ 1172 modes=mode.split(',') 1173 mode=0 1174 for part in modes: 1175 (g,m)=part.split('=') 1176 mm=0 1177 if m.find('x')!=-1: mm+=stat.IXOTH 1178 if m.find('r')!=-1: mm+=stat.IROTH 1179 if m.find('w')!=-1: mm+=stat.IWOTH 1180 if g.find('o')!=-1 or find('a')!=-1: 1181 mode+=mm 1182 if g.find('g')!=-1 or find('a')!=-1: 1183 mode+=mm<<3 1184 if g.find('u')!=-1 or find('a')!=-1: 1185 mode+=mm<<6 1186 for a in expand(args): 1187 os.chmod(a,mode)
1188
1189 -def mv(*args,**kw):
1190 """ 1191 Move or rename files and/or directories. 1192 1193 @param args: file/directory names 1194 @type args: str 1195 @keyword ignore: if True all errors are ignored. 1196 @return: a (possibly empty) list of errors. 1197 """ 1198 1199 return mux(shutil.move,*args,**kw)
1200
1201 -def filter(*args,**kw):
1202 """ 1203 Copy the contents of one or more source.files to destination 1204 while filtering each line through a filter. 1205 1206 process=copy unchanged by default, see 1207 'substitute()' for a more meaningfull application. 1208 1209 Note: source.filenames prefixed with a minus 1210 sign '-' are processed but not written to 1211 the destination. 1212 1213 @param args: files, last one is destination. 1214 @type args: str 1215 @keyword ignore: if True all errors are ignored. 1216 @return: nothing. 1217 """ 1218 1219 args=shutil2.flatten(list(args)) 1220 1221 process=kw.get('process',lambda x:x) 1222 1223 1224 dst=args.pop(-1) 1225 try: 1226 outf=open(dst,'wb') 1227 for src in args: 1228 output=True 1229 1230 if src.startswith('-'): 1231 src=src[1:] 1232 output=False 1233 1234 inf=open(src,'rb',8192) 1235 1236 for line in inf: 1237 if output: 1238 outf.write(process(line)) 1239 else: 1240 process(line) 1241 inf.close() 1242 outf.close() 1243 except: 1244 if not kw.get('ignore',False): 1245 raise
1246
1247 -def substitute(*args,**kw):
1248 """ 1249 Copy the contents of one or more source.files to destination 1250 while scaning each line for variable 1251 definitions and variable references. 1252 1253 Note: source.filenames prefixed with a minus 1254 sign '-' are processed but not written to 1255 the destination. This provides for simple 1256 inclusion of files with variable definitions. 1257 1258 Variables are defined in the following way: 1259 1260 $NAME=value where NAME is a combination of 1261 letters and digits and value a sequence of. 1262 characters, possibly enclosed in quotes. 1263 1264 variables may be referenced in tuo ways: 1265 1266 C{$NAME} followed by whitespace, or 1267 1268 C{${NAME}} which can appear anywhere. 1269 1270 Example:: 1271 1272 $COLOUR=white 1273 $COMMENT="off white with a yellow tinge" 1274 Let's paint the wall ${COLOUR}, ($COMMENT). 1275 1276 For a more exact syntax refer to 'Braid.Varsub'. 1277 1278 @param args: files, last one is destination. 1279 @type args: str 1280 @keyword ignore: if True all errors are ignored. 1281 @return: nothing. 1282 """ 1283 v=Varsub.varsub() 1284 kw['process']=v.process 1285 commands.filter(*args,**kw)
1286
1287 -def wget(*args,**kw):
1288 """ 1289 Copy the contents of one or more urls to a single local file. 1290 1291 TODO: pype 1292 1293 @param args: urls, last one is destination file 1294 @type args: str 1295 @keyword ignore: if True all errors are ignored. 1296 @return: nothing. 1297 """ 1298 1299 from urllib import urlopen 1300 1301 args=flatten(list(args)) 1302 1303 try: 1304 dst=args.pop(-1) 1305 outf=open(dst,'wb') 1306 for src in args: 1307 inf=urlopen(src) 1308 s=inf.read(8192) 1309 while s: 1310 outf.write(s) 1311 s=inf.read(8192) 1312 inf.close() 1313 outf.close() 1314 except: 1315 if not kw.get('ignore',False): 1316 raise
1317
1318 -class replace(pype,stacker,inputfilterbuffer):
1319 """ 1320 Copy lines from src to a destination file while replacing occurences of pattern. 1321 """ 1322
1323 - def __init__(self,pattern,replacement,*args,**kw):
1324 """ 1325 @param pattern: a regular expression. 1326 @type pattern: str or compiled regular expression. 1327 @param replacement: see L{re} for all possibilities. 1328 @param args: files, last one is destination. 1329 @type args: str 1330 @keyword all: if True all occurences of pattern in a line are replaced (default is only the first) 1331 @keyword count: number of occurences in a line to replace (default only the first) 1332 @keyword ignore: if True all errors are ignored. 1333 @return: nothing. 1334 """ 1335 pattern=re.compile(pattern) 1336 self.count=1 1337 def fall(x):return re.sub(pattern,replacement,x) 1338 def fcount(x):return re.sub(pattern,replacement,x,self.count) 1339 if kw.get('a',False): 1340 if 'c' in kw: raise TypeError('a and c options are mutally exclusive') 1341 self.process=fall 1342 else: 1343 if 'c' in kw: 1344 self.count=kw['c'] 1345 self.process=fcount 1346 kw['filter']=self.process 1347 pype.__init__(self,*args,**kw) 1348 stacker.__init__(self,*args,**kw) 1349 inputfilterbuffer.__init__(self,*args,**kw)
1350
1351 -def call(*args,**kw):
1352 """ 1353 Call a subprocess with args and ignore output. 1354 """ 1355 1356 args=expand(args) 1357 1358 args[0]=os.path.normpath(args[0]) 1359 1360 subprocess.Popen(args,shell=True)
1361
1362 -class callout(pype):
1363 """ 1364 Call a subprocess with args and make its output available to a pipeline. 1365 1366 """ 1367
1368 - def __init__(self,*args,**kw):
1369 args=expand(args) 1370 args[0]=os.path.normpath(args[0]) 1371 pype.__init__(self,*args,**kw) 1372 self.p=subprocess.Popen(args,shell=True,stdout=subprocess.PIPE).stdout
1373
1374 - def __iter__(self):
1375 for line in self.p: 1376 yield line
1377
1378 -class callpipe(pype):
1379 """ 1380 Call a subprocess with args and make its input and output available to a pipeline. 1381 1382 """ 1383
1384 - def __init__(self,*args,**kw):
1385 args=expand(args) 1386 args[0]=os.path.normpath(args[0]) 1387 pype.__init__(self,*args,**kw) 1388 p=subprocess.Popen(args,shell=True,stdin=subprocess.PIPE,stdout=subprocess.PIPE).stdout 1389 self.out=p.stdout 1390 self.inp=p.stdin
1391
1392 - def __iter__(self):
1393 for line in self.out: 1394 yield line
1395
1396 - def write(self,data):
1397 self.inp.write(data)
1398 1399 if __name__ == '__main__': 1400 rm('testdir1','testdir2',recursive=True,ignore=True) 1401 mkdir('testdir1','testdir2'); 1402 mkdir('testdir1/dir2/dir3',recursive=True) 1403 touch('testdir1/filea','testdir1/fileb','testdir1/filec') 1404 cp('testdir1/filea','testdir2') 1405 cp('testdir1/file*','testdir2',update=True) 1406 cp('testdir1','testdir2',update=True,recursive=True) 1407 echo('a\n','$b=40+2\n','c\n') > outfile('testdir1/abc') 1408 echo('a\n','$b\n','c\n') > outfile('testdir1/abc2') 1409 cat('testdir1/abc*') > outfile('testdir1/cat.cat') 1410 substitute(glob('testdir1/cat*'),'testdir1/abc.sub') 1411 infile('testdir1/abc.sub') | grep('4') > outfile('testdir1/abc.grep') 1412 replace(r'(\d)',r'#\1#','testdir1/abc.sub','testdir1/abc.replace') 1413 wget('http://www.nu.nl','testdir1/check.html') 1414