# Show usage: # python pyIPINE.py -h # Show example workflow: # python pyIPINE.py -e license = """ * Python script for submitting and retrieving an I-PINE job Copyright (C) 2018-present NMRFAM National Magnetic Resonance Facility, University of Wisconsin This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Contact: NMRFAM, nputmanbuehl@wisc.edu CITATION: Woonghee Lee, Arash Bahrami, Hesam Dashti, Hamid R. Eghbalnia, Marco Tonelli, William M. Westler, John L. Markley. (2019) Journal of Biomolecular NMR. 73 (5): 213-222. """ ipine_control_file_template = """ ####################################################### ## Control file template for pyIPINE.py ## by NMRFAM (pinesparky@biochem.wisc.edu) ## Updated Jan. 21, 2019 ####################################################### ## User information ####################################################### # Optional. user_name e_mail ####################################################### # job_id can be set as random. # if a user want to use their own, the format should be # ######_######_###### ( #: 0 - 9 ) # e.g. 181203_161614_123145 ####################################################### job_id random ####################################################### # Optional columns #pi_name #institute ####################################################### # Protein sequence file. Mandatory sequence myseq.txt ####################################################### ## peak lists ####################################################### ## 2D experiments ####################################################### #N15-HSQC #C13-HSQC ####################################################### ## 3D backbone experiments ####################################################### #HNCO #CBCA(CO)NH #HNCACB #HN(CO)CACB #HNCA #HN(CO)CA #HN(CA)CO #CB(CA)(CO)NH #HN(CA)CB #HN(CA)NNH ####################################################### ## 3D side chain experiments ####################################################### #C(CO)NH #H(CCO)NH #HBHA(CO)NH #HA(CO)NH #HCCH-TOCSY #CCH-TOCSY #HCCH-COSY #CCH-COSY #TOCSY-HSQC #HNHA #HBHANH ####################################################### ## 3D NOESY experiments ####################################################### #N15-NOESY #C13-NOESY ####################################################### ## 4D backbone experiments ####################################################### #HNCOCA-4D #HNCACO-4D #HNCO(-1)CA-4D #simHNCOCA-4D ####################################################### ## Optional inputs ####################################################### #prot_file #assign_file #selective_label #coordinate_file #spin_system #cs_rosetta no #######################################################""" logo=""" * pyIPINE.py by Woonghee Lee, Ph.D. * Python script for submitting and retrieving an I-PINE job * Last updated: August 13, 2019 * CITATION: Woonghee Lee, Arash Bahrami, Hesam Dashti, Hamid R. Eghbalnia, Marco Tonelli, William M. Westler, John L. Ma rkley. (2019) Journal of Biomolecular NMR. 73 (5): 213-222. Parameters: --in= [control_file.txt] Read a control file --out= [control_file.txt] Write an output file --job_id= [JOB ID] Job ID --bmrb= [BMRB ID] BioMagResBank Entry ID or BMRB file Operations: --submit Submit a job to I-PINE --download Download a job from I-PINE --web Open a web report in a web browser --template Template of a control file --test Make an assign file from BMRB entry --example, -e Show a simple example workflow --help, -h Print this message Usage: a) submit a job python pyIPINE.py --in=control_file.txt --submit b) download a job python pyIPINE.py --in=control_file.txt --download OR python pyIPINE.py --job_id=[JOB ID] --download c) open a web report in a browser python pyIPINE.py --in=control_file.txt --web OR python pyIPINE.py --job_id=[JOB ID] --web d) print out a control template file python pyIPINE.py --template e) write a control template file python pyIPINE.py --out=control_file.txt --template f) write a test assignment file from a BMRB entry python pyIPINE.py --bmrb=[BMRB ID] --out=assign_file.str --test """ example=""" * pyIPINE.py by Woonghee Lee, Ph.D. * Python script for submitting and retrieving an I-PINE job * Last updated: August 13, 2019 Example: # Fetch BMRB 6457 and make an assignment file to run python pyIPINE.py --bmrb=6457 --out=assign_file.str --test # Make a template control file python pyIPINE.py --out=control_file.txt --template # Set user name and an email to receive results (optional) # Note that a text editor can be used instead of 'sed' tool. sed -i -e 's/user_name/user_name Woonghee/g' control_file.txt sed -i -e 's/e_mail/e_mail pinesparky@biochem.wisc.edu/g' control_file.txt # Set the assignment file made from BMRB 6457 # Note that a text editor can be used instead of 'sed' tool. sed -i -e 's/sequence myseq.txt/sequence assign_file.str.seq/g' control_file.txt sed -i -e 's/#assign_file/assign_file assign_file.str/g' control_file.txt # Submit a job. A random ID will be reported unless set in the control file. python pyIPINE.py --in=control_file.txt --submit # View the results in the web browser python pyIPINE.py --job_id=######_######_###### --web # Download the results with a job ID python pyIPINE.py --job_id=######_######_###### --download """ pine_types = ('N15-HSQC','C13-HSQC','HN(CA)CO','HNCO','CBCA(CO)NH', 'HN(CO)CACB','HNCACB','HN(CO)CA','HNCA','CB(CA)(CO)NH','HN(CA)CB','C(CO)NH', 'H(CCO)NH','HBHA(CO)NH','HA(CO)NH','HCCH-TOCSY','CCH-TOCSY','HCCH-COSY', 'CCH-COSY','TOCSY-HSQC', 'HNHA', 'HBHANH','HN(CA)NNH','N15-NOESY', 'C13-NOESY', 'HNCOCA-4D', 'HNCACO-4D', 'HNCO(-1)CA-4D', 'simHNCOCA-4D') pine_field = ('file01','file15', 'file08', 'file02', 'file03', 'file05', 'file04', 'file07', 'file06', 'file09', 'file10', 'file11', 'file12', 'file13', 'file14', 'file16', 'file17', 'file18', 'file19', 'file20', 'file25', 'file21', 'file22', 'file23', 'file24', 'file26', 'file27', 'file28', 'file29') pine_field2 = ('exp1','exp15', 'exp8', 'exp2', 'exp3', 'exp5', 'exp4', 'exp7', 'exp6', 'exp9', 'exp10', 'exp11', 'exp12', 'exp13', 'exp14', 'exp16', 'exp17', 'exp18', 'exp19', 'exp20', 'exp25', 'exp21', 'exp22', 'exp23', 'exp24', 'exp26', 'exp27', 'exp28', 'exp29') AAA_dict = {'CYS': 'C', 'ASP': 'D', 'SER': 'S', 'GLN': 'Q', 'LYS': 'K', 'ILE': 'I', 'PRO': 'P', 'THR': 'T', 'PHE': 'F', 'ASN': 'N', 'GLY': 'G', 'HIS': 'H', 'LEU': 'L', 'ARG': 'R', 'TRP': 'W', 'ALA': 'A', 'VAL':'V', 'GLU': 'E', 'TYR': 'Y', 'MET': 'M'} import os, sys import urllib2, getopt import random, time import httplib, mimetypes, mimetools, cookielib cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) def post_multipart(host, selector, fields, files): """ Post fields and files to an http host as multipart/form-data. fields is a sequence of (name, value) elements for regular form fields. files is a sequence of (name, filename, value) elements for data to be uploaded as files Return the server's response page. """ content_type, body = encode_multipart_formdata(fields, files) headers = {'Content-Type': content_type, 'Content-Length': str(len(body))} r = urllib2.Request("http://%s%s" % (host, selector), body, headers) return urllib2.urlopen(r).read() def encode_multipart_formdata(fields, files): """ fields is a sequence of (name, value) elements for regular form fields. files is a sequence of (name, filename, value) elements for data to be uploaded as files Return (content_type, body) ready for httplib.HTTP instance """ BOUNDARY = mimetools.choose_boundary() CRLF = '\r\n' L = [] for (key, value) in fields: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"' % key) L.append('') L.append(value) for (key, filename, value) in files: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) L.append('Content-Type: %s' % get_content_type(filename)) L.append('') L.append(value) L.append('--' + BOUNDARY + '--') L.append('') body = CRLF.join(L) content_type = 'multipart/form-data; boundary=%s' % BOUNDARY return content_type, body def get_content_type(filename): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' def help(): print logo sys.exit() def exmp(): print example sys.exit() def print_template(): print ipine_control_file_template def save_template(out_path): f = open(out_path, 'w') f.write(ipine_control_file_template) f.close() def check_job_id(job_id): job_seg_list = job_id.split('_') if len(job_seg_list) == 1 and job_seg_list[0].lower() != 'random': return False if len(job_seg_list) != 3: return False for i in range(3): if len(job_seg_list[i]) != 6: return False for j in range(6): try: tmp = int(job_seg_list[i][j]) except: return False return True def get_arguments(opts, opt): for o, a in opts: if opt == o: return True, a return False, None def parse_arguments(): try: opts, args = getopt.getopt(sys.argv[1:], 'he', ['in=', 'out=', 'job_id=', \ 'bmrb=', 'submit', 'download', 'web', 'template', \ 'test', 'example', 'help']) except getopt.GetoptError, err: print str(err) print logo sys.exit(2) return opts, args def check_server_status(): try: urllib2.urlopen('http://i-pine.nmrfam.wisc.edu/pine3_submit.html') except urllib2.HTTPError, e: print(e.code) return (False, e.code) except urllib2.URLError, e: print(e.args) return (False, e.args) return (True, True) # check submitted job def check_submitted_job_status(job_id): try: jobUrl = 'http://zavot.nmrfam.wisc.edu/upload/script/users/' + job_id + '/token_status.txt' response = urllib2.urlopen(jobUrl) except: print job_id + ' results cannot be found.' print 'It can be either way. Job ID is incorrect or the job is still running.' return (False, 'not found') job_status = response.read() if job_status.find('succ') != -1 or job_status.find('finished') != -1: return (True, job_status) return (False, job_status) def get_file_content(file_path): if not os.path.exists(file_path): print file_path + ' not found.' return '' try: f = open(file_path, 'r') content = f.read() f.close() except: return '' return content def get_job_id(control_file): if not os.path.exists(control_file): print control_file + ' not exists' return False f = open(control_file, 'r') clines = f.readlines() f.close() job_id = '' for i in range(len(clines)): cl = clines[i].strip() if len(cl) == 0: continue if cl[0] == '#': continue # comment sl = cl.split() if sl[0].lower() == 'job_id' and len(cl) > 7: job_id = cl[6:].strip() return job_id def job_submit(control_file): if not os.path.exists(control_file): print control_file + ' not exists' return False f = open(control_file, 'r') clines = f.readlines() f.close() # process columns # mandatory columns # user_name, e_mail, job_id, sequence user_name = e_mail = job_id = sequence = pi_name = institute = '' for i in range(len(clines)): cl = clines[i].strip() if len(cl) == 0: continue if cl[0] == '#': continue # comment sl = cl.split() if sl[0].lower() == 'user_name' and len(cl) > 10: user_name = cl[9:].strip() if sl[0].lower() == 'e_mail' and len(cl) > 7: e_mail = cl[6:].strip() if sl[0].lower() == 'job_id' and len(cl) > 7: job_id = cl[6:].strip() if sl[0].lower() == 'sequence' and len(cl) > 9: sequence = cl[8:].strip() if sl[0].lower() == 'pi_name' and len(cl) > 8: pi_name = cl[7:].strip() if sl[0].lower() == 'institute' and len(cl) > 10: sequence = cl[9:].strip() if user_name == '': user_name = 'Anonymous' if e_mail == '': e_mail = 'pinesparky@biochem.wisc.edu' if job_id == '' or sequence == '': print 'Mandatory columns are not filled in the control file.' sys.exit() fields = [['name', user_name], ['email', e_mail]] fc = get_file_content(sequence) if fc == '': print 'Check your sequence file.' sys.exit() contents = [ ['seqfile', os.path.split(sequence)[-1], fc] ] fields.append (['seq', 'sequence']) # process job_id if job_id == 'random': random.seed() job_id = time.strftime("%y%m%d_%H%M%S_") + \ "%06d" % random.randrange(0,1000000) fields.append (['nmrfamsparky_token', job_id]) # process experiment columns for i in range(len(clines)): cl = clines[i].strip() if len(cl) == 0: continue if cl[0] == '#': continue # comment sl = cl.split() try: pine_idx = pine_types.index(sl[0]) except: continue fc = get_file_content(sl[1]) if fc == '': print 'Check your ' + sl[1] + ' file.' sys.exit() contents.append( [pine_field[pine_idx], os.path.split(sl[1])[-1], fc] ) fields.append ([pine_field2[pine_idx], pine_types[pine_idx]]) # process optional columns for i in range(len(clines)): cl = clines[i].strip() if len(cl) == 0: continue if cl[0] == '#': continue # comment sl = cl.split() if sl[0] == 'prot_file': fc = get_file_content(sl[1]) if fc == '': print 'Check your ' + sl[1] + ' file.' sys.exit() contents.append( ['prot_file', os.path.split(sl[1])[-1], fc] ) fields.append (['prot_file', 'prot_file']) elif sl[0] == 'assign_file': fc = get_file_content(sl[1]) if fc == '': print 'Check your ' + sl[1] + ' file.' sys.exit() contents.append( ['pre_assign', os.path.split(sl[1])[-1], fc] ) fields.append (['assign_file', 'Pre_assignment']) elif sl[0] == 'label_file': fc = get_file_content(sl[1]) if fc == '': print 'Check your ' + sl[1] + ' file.' sys.exit() contents.append( ['labeling', os.path.split(sl[1])[-1], fc] ) fields.append (['label_file', 'Selective_Labeling']) elif sl[0] == 'pdb_file': fc = get_file_content(sl[1]) if fc == '': print 'Check your ' + sl[1] + ' file.' sys.exit() contents.append( ['pdb', os.path.split(sl[1])[-1], fc] ) fields.append (['pdb_file', 'PDB coordinate']) elif sl[0] == 'spin_system': fc = get_file_content(sl[1]) if fc == '': print 'Check your ' + sl[1] + ' file.' sys.exit() contents.append( ['spin_system', os.path.split(sl[1])[-1], fc] ) fields.append (['spin_system', 'Spin system']) elif sl[0] == 'csrosetta': if sl[1].lower() == 'yes': fields.append (['bmrb_rosetta', 'bmrb_rosetta']) ipine_url = 'pine3.nmrfam.wisc.edu' ipine_selector = '/cgi-bin/pine3uploader.cgi' text=post_multipart(ipine_url, ipine_selector, fields, contents) print 'Submitted...\n' print 'Job ID: %s\n' % (job_id) print 'Download results like this:' print ' python pyIPINE.py --job_id=%s --download\n' % (job_id) print 'View web results like this:' print ' python pyIPINE.py --job_id=%s --web\n' % (job_id) def make_preassign(parsed_bmrb): preassign_header = """loop_ _Atom_shift_assign_ID _Residue_seq_code _Residue_label _Atom_name _Atom_type _Chem_shift_value _Chem_shift_value_error _Chem_shift_ambiguity_code """ preassign_content = '' seq_content = '' iInc = 1 for i in range(len(parsed_bmrb)): if len(parsed_bmrb[i]) == 0: continue szAAA = parsed_bmrb[i][1] seq_content = seq_content + szAAA + '\n' for j in range(len(parsed_bmrb[i][3])): szAtom, shift = parsed_bmrb[i][3][j] if szAtom[0] == 'H': err = 0.02 else: err = 0.2 cs_line = ' %4d %4d %4s %4s %4s %7.3f %7.3f 0 \n' % (iInc, i, szAAA, szAtom, szAtom[0], shift, err) preassign_content = preassign_content + cs_line return seq_content, preassign_header + preassign_content + '\nstop_\nsave_\n' def parse_bmrb(bmrb_id): # check if this file exists if os.path.exists(bmrb_id): # file exists f = open(bmrb_id, 'r') bmrblines = f.readlines() f.close() else: # download from web bmrb_url = 'http://rest.bmrb.wisc.edu/bmrb/NMR-STAR3/' + bmrb_id try: bmrb_data = urllib2.urlopen(bmrb_url, timeout=2) bmrblines = bmrb_data.readlines() except: print 'BMRB entry could not be downloaded.' return (False, ['BMRB entry could not be downloaded.']) # start parsing iStart = -1 iCompID = -1 for i in range(len(bmrblines)): if bmrblines[i].strip() == '_Entity_comp_index.ID': iStart = i for j in range(iStart, len(bmrblines)): if bmrblines[j].strip() == '_Entity_comp_index.Comp_ID': iCompID = j-i break if iCompID == -1: return (False, ['Sequence loop could not be found.']) break seq_list = [] iFirst = -1 for i in range(1000): seq_list.append([]) for i in range(iStart+iCompID, len(bmrblines)): slist = bmrblines[i].strip().split() if len(slist) == 0: continue if slist[0] == 'stop_' or slist[0] == '_stop': break if len(slist) < 3: continue try: seqidx = int(slist[0]) if iFirst == -1: iFirst = seqidx seqidx = seqidx-iFirst+1 seqcode = slist[iCompID] seqcodeA = AAA_dict[seqcode] seq_list[seqidx] = [seqidx, seqcode, seqcodeA, []] except: continue # now start parsing!!! iStart = iSeqID = iCompID = iAtomID = iVal = -1 for i in range(len(bmrblines)): splitted = bmrblines[i].strip().split('.') if len(splitted) < 2: continue if splitted[0] == '_Atom_chem_shift': # chemical shift starts for j in range(i, len(bmrblines)): splitted2 = bmrblines[j].strip().split('.') if len(splitted2) < 2: # done reading fields iStart = j+1 break if splitted2[1] == 'Seq_ID': iSeqID = j-i elif splitted2[1] == 'Comp_ID': iCompID = j-i # AA elif splitted2[1] == 'Atom_ID': iAtomID = j-i elif splitted2[1] == 'Val': iVal = j-i if iSeqID == -1 or iCompID == -1 or iAtomID == -1 or iVal == -1: print (False, ['_Atom_chem_shift does not seem right.']) for j in range(iStart, len(bmrblines)): slist = bmrblines[j].strip().split() if len(slist) == 0: continue if slist[0] == 'stop_' or slist[0] == '_stop': break if len(slist) < 3: continue try: iSeqIdx = int(slist[iSeqID])-iFirst+1 szA = AAA_dict[slist[iCompID]] szAtom = slist[iAtomID] dCS = float(slist[iVal]) except: continue seq_list[iSeqIdx][3].append([szAtom, dCS]) break # done reading return (True, seq_list) # seqidx, seqcode, seqcodeA, [atom, cs] ## main code def main(): opts, args = parse_arguments() bArg_in, arg_in = get_arguments(opts, '--in') bArg_out, arg_out = get_arguments(opts, '--out') bArg_job_id, arg_job_id = get_arguments(opts, '--job_id') bArg_bmrb, arg_bmrb = get_arguments(opts, '--bmrb') bArg_submit, arg_submit = get_arguments(opts, '--submit') bArg_download, arg_download = get_arguments(opts, '--download') bArg_web, arg_web = get_arguments(opts, '--web') bArg_template, arg_template = get_arguments(opts, '--template') bArg_test, arg_test = get_arguments(opts, '--test') bArg_help, arg_help = get_arguments(opts, '--help') bArg_example, arg_example = get_arguments(opts, '--example') bArg_help2, arg_help2 = get_arguments(opts, '-h') bArg_example2, arg_example2 = get_arguments(opts, '-e') if bArg_help or bArg_help2 or len(opts) == 0: help() sys.exit() if bArg_example or bArg_example2: exmp() sys.exit() if bArg_test: if not bArg_bmrb: print '--bmrb not set' sys.exit() bTemp, bmrb_list = parse_bmrb(arg_bmrb) if not bTemp: print 'Reading BMRB failed: ' + bmrb_list[0] sys.exit() seq_content, preassign_content = make_preassign(bmrb_list) if not bArg_out: print seq_content print preassign_content sys.exit() f = open(arg_out, 'w') f.write(preassign_content) f.close() # sequence file will also be made f = open(arg_out+'.seq', 'w') f.write(seq_content) f.close() sys.exit() if bArg_template: if not bArg_out: print_template() sys.exit() f = open(arg_out, 'w') f.write(ipine_control_file_template) f.close() sys.exit() if bArg_web: if bArg_job_id: bTemp, job_status = check_submitted_job_status(arg_job_id) if not bTemp: print job_status sys.exit() job_id = arg_job_id elif bArg_in: job_id = get_job_id(arg_in) if job_id == 'random': print 'Job ID was randomly generated. Please use --job_id parameter.' sys.exit() if job_id == '': print 'Job ID could not be isolated. Please use --job_id parameter.' sys.exit() bTemp, job_status = check_submitted_job_status(job_id) if not bTemp: print job_status sys.exit() else: print 'Neither --job_id nor --in was used.' sys.exit() import webbrowser jobUrl = 'http://zavot.nmrfam.wisc.edu/upload/script/users/' + job_id + '/web_results' webbrowser.open(jobUrl) sys.exit() if bArg_download: if bArg_job_id: bTemp, job_status = check_submitted_job_status(arg_job_id) if not bTemp: print job_status sys.exit() job_id = arg_job_id elif bArg_in: job_id = get_job_id(arg_in) if job_id == 'random': print 'Job ID was randomly generated. Please use --job_id parameter.' sys.exit() if job_id == '': print 'Job ID could not be isolated. Please use --job_id parameter.' sys.exit() bTemp, job_status = check_submitted_job_status(job_id) if not bTemp: print job_status sys.exit() else: print 'Neither --job_id nor --in was used.' sys.exit() import webbrowser jobUrl = 'http://zavot.nmrfam.wisc.edu/upload/script/users/' + job_id + '/results/all_results.zip' webbrowser.open(jobUrl) sys.exit() if bArg_submit: if not bArg_in: print '--in not set' sys.exit() bStatus, server_status = check_server_status() if not bStatus: sys.exit() job_submit(arg_in) def check_submitted_job_status(job_id): try: jobUrl = 'http://zavot.nmrfam.wisc.edu/upload/script/users/' + job_id + '/token_status.txt' response = urllib2.urlopen(jobUrl, timeout=15) except: print job_id + ' results cannot be found.' print 'It can be either way. Job ID is incorrect or the job is still running.' return (False, 'not found') job_status = response.read() if job_status.find('succ') != -1 or job_status.find('finished') != -1: return (True, job_status) return (False, job_status) if __name__ == "__main__": try: print 'Python version test' except: print('Please use python 2.7.\nPlease run like this: python2.7 pyIPINE.py -h') sys.exit() main()