Ad

repairs your list cur (current) with the knowledge from the list pre (previous).

print(repair(['line 1', '???? 2', 'line 3'], ['line 1', 'line 2']))
# ['line 1', 'line 2', 'line 3']

Previous kumite solition and details formalization was good.
For increase hard of solution I add additional hard test case of:

  • the number of broken fragment can be more that 1.
  • the length of broken fragment can be same, in this case should tetect nearest by position
  • sametime has bad input data - impossible length of repaired text - do not repair it.

Same bugfix. Now, the code has more work.

Code
Diff
  • # This solution not vork correctly. Try improvement this code.
    
    CONST_UNDEFINED = -999999 # or None
    CONST_MAX_VALUE = 999999
    class RepairEngine:
        searchWindow1 = 70 # when diff < DIFF_TRESHOLD
        searchWindow2 = 14 # when diff < 3
        DIFF_TRESHOLD_1=68
        DIFF_TRESHOLD_2=3
        def __str__(self):
            return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
        
        def indexing(self, historySeq):
            if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
            for text in historySeq: #markBrokenLines
                for l in text:
                    l['hasBroken'] = self.checkBrokenLine(l['line'])
            for i in range(1,len(historySeq)):
                self.linkFiles(historySeq[i-1], historySeq[i])
        #end of indexing
        
        def linkFiles(self, lines1, lines2):
            p2=0
            seqenceLost=0
            for i in range(0, len(lines1)):
                #print(f"Progress {str(i+1)}/{len(lines1)} history items")
                myLine=lines1[i]
                p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
                #print('p=',p,' thisK=',thisK,'  i=',i,'  text:', myLine['line'])
                if ((p!=CONST_UNDEFINED) and (p>=0)):
                    if (p2!=p):
                        #print(" new offset {str(i)} > {str(p)}")
                        p2=p
                    myLine['nextPos']=p
                    applyPatch = True
                    if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
                        if lines2[p]['preK'] > thisK:
                            None
                        elif (lines2[p]['preK'] < thisK):
                            applyPatch = False
                        else:
                            None
                    if applyPatch:
                        lines2[p]['prePos'] = i
                        lines2[p]['preK'] = thisK
                        if i != myLine['pos']:
                            print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
                    if seqenceLost>self.searchWindow1*3:
                        print(f"i={str(i)}, found p={str(p)} after lost search window");
                    seqenceLost=0
                else:
                    #print("i={str(i)} pos={str(p)} not found position")
                    seqenceLost +=1
                    if seqenceLost==self.searchWindow1*3:
                        print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
                p2 +=1
        # end of linkFiles
        
        # TODO hard method. Need more unit test!    
        def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
            result=CONST_UNDEFINED
            resultD=CONST_MAX_VALUE
            for fromPos in [nearPos1,nearPos2] :
                if (fromPos==CONST_UNDEFINED): continue
                if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
                for d in range(0,self.searchWindow1):
                    # todo mark previous interval and do not check against lines.
                    p=fromPos+d
                    if (0<= p) and (p <len(inLines)):
                        k = self.stringDiff(text, inLines[p]['line']);
                        if (k<=self.DIFF_TRESHOLD_1*3):
                            k+=min(20,d/40)
                            k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                        #print('k=',k,' resultD=',resultD,'; t1="',text,'" t2="',inLines[p]['line'],'" ')
                        if (resultD>k):
                            result, resultD = p, k
                    if (d!=0):
                        p=fromPos-d
                        if (0<= p) and (p <len(inLines)):
                            k = self.stringDiff(text, inLines[p]['line'])
                            if (k<=self.DIFF_TRESHOLD_1*3):
                                k+=min(20,d/40)
                                k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                            if resultD>k :
                                result, resultD = p, k
                    if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
                        break # stop search - it is best.
            if (resultD>self.DIFF_TRESHOLD_1):
                return -1, None # not found
            return result, resultD
        # end of searchLine
        
        not_allowed="?*"
        def checkBrokenLine(self, line):
            wrongChar=0
            questionChar=0
            for c in line:
                if c in self.not_allowed: wrongChar+=1
                #if not c in self.allowed: wrongChar+=1
                if c=='?':
                    questionChar+=1
                    #if c in self.not_allowed: wrongChar-=1
            return wrongChar>0 or (len(line)>5 and questionChar>5)
        # end of checkBrokenLine
        
        # Soft comparsion of 2 string
        # return  0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
        def stringDiff(self, l1, l2):
            if (l1==l2): 
                #print('compare ',l1,l2," EQ=0")
                return 0
            if (l1.lower()==l2.lower()):
                #print('compare ',l1,l2," EQ=1")
                return 1
            l1=l1.replace("  ", " ").strip()
            l2=l2.replace("  ", " ").strip()
            #todo optimization: l1,l2 = l1.upper(),l2.upper()
            l1,l2=l1.upper(),l2.upper()
            if (len(l1)==len(l2)):
                eq=True
                for i in range(len(l1)):
                    eq=eq and (l1[i]==l2[i]) or (l1[i] in self.not_allowed) or (l1[i] in self.not_allowed)
                if eq:
                    #print('compare ',l1,l2," EQ=1,2")
                    return 1
            
            #Damerau–Levenshtein distance / Расстояние Дамерау — Левенштейна  (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662  Creative commons CC-BY-SA )
            def damerau_levenshtein_distance(s1, s2):
                d = {}
                lenstr1 = len(s1)
                lenstr2 = len(s2)
                for i in range(-1,lenstr1+1):
                    d[(i,-1)] = i+1
                for j in range(-1,lenstr2+1):
                    d[(-1,j)] = j+1
                for i in range(lenstr1):
                    for j in range(lenstr2):
                        if s1[i] == s2[j]:
                            cost = 0
                        elif (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
                            cost = 0
                        else:
                            cost = 3
                        d[(i,j)] = min(
                                       d[(i-1,j)] + 1, # deletion
                                       d[(i,j-1)] + 1, # insertion
                                       d[(i-1,j-1)] + cost, # substitution
                                      )
                        if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                                d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
                return d[lenstr1-1,lenstr2-1]
            
            xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
            
            if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
                #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE)
                return CONST_MAX_VALUE
            xDiff = xDiff *300 / len(l1)
            if (xDiff>CONST_MAX_VALUE-1000):
                #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE-100)
                return CONST_MAX_VALUE-100
            #print('compare ',l1,l2," EQ=",xDiff)
            return round(xDiff)
        # end of stringDiff
        
        def repair(self, historySeq, itm:int=-1):
            if (itm==0): raise BaseException("Can not repair first item[0] from future!")
            if (itm<0): itm=len(historySeq)-1
            #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
            lines=historySeq[itm]
            out=[]
            statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
            print(" repair lines:"+str(len(lines)))
            for line in lines:
                print(" repair line>"+repr(line)+"  pos="+str(line['pos'])+" test="+str(line))
                if (line['hasBroken']):
                    oldP=line['prePos']
                    depth=0
                    i=itm-1
                    while (oldP!=CONST_UNDEFINED and i>=0):
                        depth+=1
                        statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
                        oldL=historySeq[i][oldP]
                        if (not oldL['hasBroken']):
                            print(" repair line<use old source")
                            out.append(oldL['line'])
                            #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
                            #        itm, i, line.pos, oldP, line.line, oldL.line);
                            statRepair+=1
                            statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
                            break
                        #else:
                        #    log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
                        oldP=oldL['prePos']
                        i-=1
                    if (oldP==CONST_UNDEFINED) :
                        print(" repair line<not found. ", line['line'])
                        out.append(line['line']) #no modify
                        statNotFound+=1
                else:
                    print(" repair line<no need")
                    out.append(line['line'])
            print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
            print(" repair out lines:"+str(len(out)))
            return out
        # end of repair
        
        def process(self, historySeq, i_to_repair:int=-1):
            hSeq=[]
            for histText in historySeq:
                lines=[]
                i=0
                for l in histText:
                    tl={'pos':i, 'line':l, 'hasBroken':False,
        'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
                    lines.append(tl)
                    i+=1
                hSeq.append(lines)
            print("indexing...")
            idx=self.indexing(hSeq)
            print("pepair n="+str(i_to_repair))
            repair=self.repair(hSeq, i_to_repair)
            print("done")
            return repair
    
    def repair_text_by_history(history_of_text):
        repairer=RepairEngine()
        text=repairer.process(history_of_text, len(history_of_text)-1)
        return text
    • # This solution not vork correctly. Try improvement this code.
    • CONST_UNDEFINED = -999999 # or None
    • CONST_MAX_VALUE = 999999
    • class RepairEngine:
    • searchWindow1 = 70 # when diff < DIFF_TRESHOLD
    • searchWindow2 = 14 # when diff < 3
    • DIFF_TRESHOLD_1=68
    • DIFF_TRESHOLD_2=3
    • def __str__(self):
    • return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
    • def indexing(self, historySeq):
    • if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
    • for text in historySeq: #markBrokenLines
    • for l in text:
    • l['hasBroken'] = self.checkBrokenLine(l['line'])
    • for i in range(1,len(historySeq)):
    • self.linkFiles(historySeq[i-1], historySeq[i])
    • #end of indexing
    • def linkFiles(self, lines1, lines2):
    • p2=0
    • seqenceLost=0
    • for i in range(0, len(lines1)):
    • #print(f"Progress {str(i+1)}/{len(lines1)} history items")
    • myLine=lines1[i]
    • p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
    • #print('p=',p,' thisK=',thisK,' i=',i,' text:', myLine['line'])
    • if ((p!=CONST_UNDEFINED) and (p>=0)):
    • if (p2!=p):
    • #print(" new offset {str(i)} > {str(p)}")
    • p2=p
    • myLine['nextPos']=p
    • applyPatch = True
    • if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
    • if lines2[p]['preK'] > thisK:
    • None
    • elif (lines2[p]['preK'] < thisK):
    • applyPatch = False
    • else:
    • None
    • if applyPatch:
    • lines2[p]['prePos'] = i
    • lines2[p]['preK'] = thisK
    • if i != myLine['pos']:
    • print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
    • if seqenceLost>self.searchWindow1*3:
    • print(f"i={str(i)}, found p={str(p)} after lost search window");
    • seqenceLost=0
    • else:
    • #print("i={str(i)} pos={str(p)} not found position")
    • seqenceLost +=1
    • if seqenceLost==self.searchWindow1*3:
    • print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
    • p2 +=1
    • # end of linkFiles
    • # TODO hard method. Need more unit test!
    • def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
    • result=CONST_UNDEFINED
    • resultD=CONST_MAX_VALUE
    • for fromPos in [nearPos1,nearPos2] :
    • if (fromPos==CONST_UNDEFINED): continue
    • if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
    • for d in range(0,self.searchWindow1):
    • # todo mark previous interval and do not check against lines.
    • p=fromPos+d
    • if (0<= p) and (p <len(inLines)):
    • k = self.stringDiff(text, inLines[p]['line']);
    • if (k<=self.DIFF_TRESHOLD_1*3):
    • k+=min(20,d/40)
    • k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
    • #print('k=',k,' resultD=',resultD,'; t1="',text,'" t2="',inLines[p]['line'],'" ')
    • if (resultD>k):
    • result, resultD = p, k
    • if (d!=0):
    • p=fromPos-d
    • if (0<= p) and (p <len(inLines)):
    • k = self.stringDiff(text, inLines[p]['line'])
    • if (k<=self.DIFF_TRESHOLD_1*3):
    • k+=min(20,d/40)
    • k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
    • if resultD>k :
    • result, resultD = p, k
    • if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
    • break # stop search - it is best.
    • if (resultD>self.DIFF_TRESHOLD_1):
    • return -1, None # not found
    • return result, resultD
    • # end of searchLine
    • not_allowed="?*"
    • def checkBrokenLine(self, line):
    • wrongChar=0
    • questionChar=0
    • for c in line:
    • if c in self.not_allowed: wrongChar+=1
    • #if not c in self.allowed: wrongChar+=1
    • if c=='?':
    • questionChar+=1
    • #if c in self.not_allowed: wrongChar-=1
    • return wrongChar>0 or (len(line)>5 and questionChar>5)
    • # end of checkBrokenLine
    • # Soft comparsion of 2 string
    • # return 0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
    • def stringDiff(self, l1, l2):
    • if (l1==l2): return 0
    • if (l1.lower()==l2.lower()): return 1
    • if (l1==l2):
    • #print('compare ',l1,l2," EQ=0")
    • return 0
    • if (l1.lower()==l2.lower()):
    • #print('compare ',l1,l2," EQ=1")
    • return 1
    • l1=l1.replace(" ", " ").strip()
    • l2=l2.replace(" ", " ").strip()
    • #todo optimization: l1,l2 = l1.upper(),l2.upper()
    • l1,l2=l1.upper(),l2.upper()
    • if (len(l1)==len(l2)):
    • eq=True
    • for i in range(len(l1)):
    • eq=eq and (l1[i]==l2[i]) or (l1[i] in self.not_allowed) or (l1[i] in self.not_allowed)
    • if eq:
    • #print('compare ',l1,l2," EQ=1,2")
    • return 1
    • #Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA )
    • #Damerau–Levenshtein distance / Расстояние Дамерау — Левенштейна (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662 Creative commons CC-BY-SA )
    • def damerau_levenshtein_distance(s1, s2):
    • d = {}
    • lenstr1 = len(s1)
    • lenstr2 = len(s2)
    • for i in range(-1,lenstr1+1):
    • d[(i,-1)] = i+1
    • for j in range(-1,lenstr2+1):
    • d[(-1,j)] = j+1
    • for i in range(lenstr1):
    • for j in range(lenstr2):
    • if s1[i] == s2[j]:
    • cost = 0
    • if (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
    • cost = 1
    • elif (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
    • cost = 0
    • else:
    • cost = 3
    • d[(i,j)] = min(
    • d[(i-1,j)] + 1, # deletion
    • d[(i,j-1)] + 1, # insertion
    • d[(i-1,j-1)] + cost, # substitution
    • )
    • if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
    • d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
    • return d[lenstr1-1,lenstr2-1]
    • xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
    • if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
    • return CONST_MAX_VALUE
    • #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE)
    • return CONST_MAX_VALUE
    • xDiff = xDiff *300 / len(l1)
    • if (xDiff>CONST_MAX_VALUE-1000):
    • #print('compare ',l1,l2," EQ=",CONST_MAX_VALUE-100)
    • return CONST_MAX_VALUE-100
    • #print('compare ',l1,l2," EQ=",xDiff)
    • return round(xDiff)
    • # end of stringDiff
    • def repair(self, historySeq, itm:int=-1):
    • if (itm==0): raise BaseException("Can not repair first item[0] from future!")
    • if (itm<0): itm=len(historySeq)-1
    • #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
    • lines=historySeq[itm]
    • out=[]
    • statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
    • print(" repair lines:"+str(len(lines)))
    • for line in lines:
    • print(" repair line>"+repr(line)+" pos="+str(line['pos'])+" test="+str(line))
    • if (line['hasBroken']):
    • oldP=line['prePos']
    • depth=0
    • i=itm-1
    • while (oldP!=CONST_UNDEFINED and i>=0):
    • depth+=1
    • statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
    • oldL=historySeq[i][oldP]
    • if (not oldL['hasBroken']):
    • print(" repair line<use old source")
    • out.append(oldL['line'])
    • #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
    • # itm, i, line.pos, oldP, line.line, oldL.line);
    • statRepair+=1
    • statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
    • break
    • #else:
    • # log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
    • oldP=oldL['prePos']
    • i-=1
    • if (oldP==CONST_UNDEFINED) :
    • print(" repair line<not found. ", line['line'])
    • out.append(line['line']) #no modify
    • statNotFound+=1
    • else:
    • print(" repair line<no need")
    • out.append(line['line'])
    • print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
    • print(" repair out lines:"+str(len(out)))
    • return out
    • # end of repair
    • def process(self, historySeq, i_to_repair:int=-1):
    • hSeq=[]
    • for histText in historySeq:
    • lines=[]
    • i=0
    • for l in histText:
    • tl={'pos':-1, 'line':l, 'hasBroken':False,
    • tl={'pos':i, 'line':l, 'hasBroken':False,
    • 'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
    • lines.append(tl)
    • i+=1
    • hSeq.append(lines)
    • print("indexing...")
    • idx=self.indexing(hSeq)
    • print("pepair n="+str(i_to_repair))
    • repair=self.repair(hSeq, i_to_repair)
    • print("done")
    • return repair
    • def repair_text_by_history(history_of_text):
    • repairer=RepairEngine()
    • text=repairer.process(history_of_text, len(history_of_text)-1)
    • return text

The old history text

You got damaged paper with text. And got previous edition of this text. Each next edition has any good changes and break character change. In broken lines will be same character replaced to '?'
Make solution for repair broken text by history.

##Input:

[
['line 1','line 2'], # first
['line 1','???? 2','line 3'] # second
]

As you can see, this data like Git changelog.

##Output:
Repair last text from input.

['line 1','line 2','line 3']
# This solution not vork correctly. Try improvement this code.

CONST_UNDEFINED = -999999 # or None
CONST_MAX_VALUE = 999999
class RepairEngine:
    searchWindow1 = 70 # when diff < DIFF_TRESHOLD
    searchWindow2 = 14 # when diff < 3
    DIFF_TRESHOLD_1=68
    DIFF_TRESHOLD_2=3
    def __str__(self):
        return f"RepairEngine(searchWindow1={str(self.searchWindow1)}, searchWindow2={str(self.searchWindow2)}, DIFF_TRESHOLD_1={str(self.DIFF_TRESHOLD_1)})"
    
    def indexing(self, historySeq):
        if len(historySeq)<2 : raise BaseException("History sequence should have 2 or more files. historySeq="+str(historySeq))
        for text in historySeq: #markBrokenLines
            for l in text:
                l['hasBroken'] = self.checkBrokenLine(l['line'])
        for i in range(1,len(historySeq)):
            self.linkFiles(historySeq[i-1], historySeq[i])
    #end of indexing
    
    def linkFiles(self, lines1, lines2):
        p2=0
        seqenceLost=0
        for i in range(0, len(lines1)):
            #print(f"Progress {str(i+1)}/{len(lines1)} history items")
            myLine=lines1[i]
            p,thisK =self.searchLine(myLine['line'], lines2, p2, i);
            if ((p!=CONST_UNDEFINED) and (p>=0)):
                if (p2!=p):
                    #print(" new offset {str(i)} > {str(p)}")
                    p2=p
                myLine['nextPos']=p
                applyPatch = True
                if (lines2[p]['prePos']!=CONST_UNDEFINED) and (lines2[p]['prePos']!=i):
                    if lines2[p]['preK'] > thisK:
                        None
                    elif (lines2[p]['preK'] < thisK):
                        applyPatch = False
                    else:
                        None
                if applyPatch:
                    lines2[p]['prePos'] = i
                    lines2[p]['preK'] = thisK
                    if i != myLine['pos']:
                        print(f"WARN: i={str(i)} and pos={str(myLine['pos'])} not match! At line \"{str(myLine['line'])}\"");
                if seqenceLost>self.searchWindow1*3:
                    print(f"i={str(i)}, found p={str(p)} after lost search window");
                seqenceLost=0
            else:
                #print("i={str(i)} pos={str(p)} not found position")
                seqenceLost +=1
                if seqenceLost==self.searchWindow1*3:
                    print(f"i={str(i)}, p={str(p)} lost search window, next position search may be not correct");
            p2 +=1
    # end of linkFiles
    
    # TODO hard method. Need more unit test!    
    def searchLine(self, text, inLines, nearPos1:int, nearPos2:int):
        result=CONST_UNDEFINED
        resultD=CONST_MAX_VALUE
        for fromPos in [nearPos1,nearPos2] :
            if (fromPos==CONST_UNDEFINED): continue
            if (result!=CONST_UNDEFINED and nearPos1==nearPos2): break
            for d in range(0,self.searchWindow1):
                # todo mark previous interval and do not check against lines.
                p=fromPos+d
                if (0<= p) and (p <len(inLines)):
                    k = self.stringDiff(text, inLines[p]['line']);
                    if (k<=self.DIFF_TRESHOLD_1*3):
                        k+=min(20,d/40)
                        k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                    if (resultD>k):
                        result, resultD = p, k
                if (d!=0):
                    p=fromPos-d
                    if (0<= p) and (p <len(inLines)):
                        k = self.stringDiff(text, inLines[p]['line'])
                        if (k<=self.DIFF_TRESHOLD_1*3):
                            k+=min(20,d/40)
                            k+=min(16,min(abs(p-nearPos1),abs(p-nearPos2))/50)
                        if resultD>k :
                            result, resultD = p, k
                if (resultD==0 or resultD<self.DIFF_TRESHOLD_2 and d>3 and d < self.searchWindow2):
                    break # stop search - it is best.
        if (resultD>self.DIFF_TRESHOLD_1):
            return -1, None # not found
        return result, resultD
    # end of searchLine
    
    not_allowed="?*"
    def checkBrokenLine(self, line):
        wrongChar=0
        questionChar=0
        for c in line:
            if c in self.not_allowed: wrongChar+=1
            #if not c in self.allowed: wrongChar+=1
            if c=='?':
                questionChar+=1
                #if c in self.not_allowed: wrongChar-=1
        return wrongChar>0 or (len(line)>5 and questionChar>5)
    # end of checkBrokenLine
    
    # Soft comparsion of 2 string
    # return  0 - equals, 2 equals ignore case and whitespace, 15 high diff but maybe same, Integer.MAX_VALUE absolutly different
    def stringDiff(self, l1, l2):
        if (l1==l2): return 0
        if (l1.lower()==l2.lower()): return 1
        l1=l1.replace("  ", " ").strip()
        l2=l2.replace("  ", " ").strip()
        #todo optimization: l1,l2 = l1.upper(),l2.upper()
        l1,l2=l1.upper(),l2.upper()
        
        #Расстояние Дамерау — Левенштейна  (https://ru.wikipedia.org/w/index.php?#title=%D0%A0%D0%B0%D1%81%D1%81%D1%82%D0%BE%D1%8F%D0%BD%D0%B8%D0%B5_%D0%94%D0%B0%D0%BC%D0%B5%D1%80%D0%B0%D1%83_%E2%80%94_%D0%9B%D0%B5%D0%B2%D0%B5%D0%BD%D1%88%D1%82%D0%B5%D0%B9%D0%BD%D0%B0&oldid=100038662  Creative commons CC-BY-SA )
        def damerau_levenshtein_distance(s1, s2):
            d = {}
            lenstr1 = len(s1)
            lenstr2 = len(s2)
            for i in range(-1,lenstr1+1):
                d[(i,-1)] = i+1
            for j in range(-1,lenstr2+1):
                d[(-1,j)] = j+1
            for i in range(lenstr1):
                for j in range(lenstr2):
                    if s1[i] == s2[j]:
                        cost = 0
                    if (s1[i] in self.not_allowed) or (s2[j] in self.not_allowed): # modification weight
                        cost = 1
                    else:
                        cost = 3
                    d[(i,j)] = min(
                                   d[(i-1,j)] + 1, # deletion
                                   d[(i,j-1)] + 1, # insertion
                                   d[(i-1,j-1)] + cost, # substitution
                                  )
                    if i and j and s1[i] == s2[j-1] and s1[i-1] == s2[j]:
                            d[(i,j)] = min(d[(i,j)], d[i-2,j-2] + 1) # transposition
            return d[lenstr1-1,lenstr2-1]
        
        xDiff=abs(len(l1)-len(l2)) + damerau_levenshtein_distance(l1,l2)
        
        if (len(l1)<3 or len(l2)<3 or abs(len(l1)-len(l2))>30):
             return CONST_MAX_VALUE
        xDiff = xDiff *300 / len(l1)
        if (xDiff>CONST_MAX_VALUE-1000):
            return CONST_MAX_VALUE-100
        return round(xDiff)
    # end of stringDiff
    
    def repair(self, historySeq, itm:int=-1):
        if (itm==0): raise BaseException("Can not repair first item[0] from future!")
        if (itm<0): itm=len(historySeq)-1
        #log.debug("Start repair history[{}] \"{}\"", itm, historySeq.get(itm).name);
        lines=historySeq[itm]
        out=[]
        statRepair, statHistoryUsedDepth, statHistoryWatchDepth, statNotFound = 0,0,0,0
        print(" repair lines:"+str(len(lines)))
        for line in lines:
            print(" repair line>"+repr(line)+"  pos="+str(line['pos'])+" test="+str(line))
            if (line['hasBroken']):
                oldP=line['prePos']
                depth=0
                i=itm-1
                while (oldP!=CONST_UNDEFINED and i>=0):
                    depth+=1
                    statHistoryWatchDepth=max(statHistoryWatchDepth, depth)
                    oldL=historySeq[i][oldP]
                    if (not oldL['hasBroken']):
                        print(" repair line<use old source")
                        out.append(oldL['line'])
                        #log.trace(" [{}][{}] line {}({})> repair from \"{}\" to \"{}\"",
                        #        itm, i, line.pos, oldP, line.line, oldL.line);
                        statRepair+=1
                        statHistoryUsedDepth=max(statHistoryUsedDepth, depth)
                        break
                    #else:
                    #    log.trace(" [{}][{}] line {}({})> go deep", itm, i, line.pos, oldP);
                    oldP=oldL['prePos']
                    i-=1
                if (oldP==CONST_UNDEFINED) :
                    print(" repair line<not found. ", line['line'])
                    out.append(line['line']) #no modify
                    statNotFound+=1
            else:
                print(" repair line<no need")
                out.append(line['line'])
        print(f"Repair complete: repair {statRepair} lines, max history depth = {statHistoryUsedDepth}, max history watch depth = {statHistoryWatchDepth}, not found {statNotFound} lines.")
        print(" repair out lines:"+str(len(out)))
        return out
    # end of repair
    
    def process(self, historySeq, i_to_repair:int=-1):
        hSeq=[]
        for histText in historySeq:
            lines=[]
            i=0
            for l in histText:
                tl={'pos':-1, 'line':l, 'hasBroken':False,
    'prePos':CONST_UNDEFINED, 'preK':-1.0, 'nextPos':CONST_UNDEFINED}
                lines.append(tl)
            hSeq.append(lines)
        print("indexing...")
        idx=self.indexing(hSeq)
        print("pepair n="+str(i_to_repair))
        repair=self.repair(hSeq, i_to_repair)
        print("done")
        return repair

def repair_text_by_history(history_of_text):
    repairer=RepairEngine()
    text=repairer.process(history_of_text, len(history_of_text)-1)
    return text
Code
Diff
  • def kimite(grid):
        rows, cols = len(grid), len(grid[0])
        # Burn method. Less optimal that Djikstra, but simple.
        hotmap=[[None for c in range(cols)] for r in range(rows)]
        hotmap[0][0]=grid[0][0] # first point
        changed=True
        itr=0
        while changed:
            changed=False
            for r in range(rows):
                for c in range(cols):
                    d=[]
                    for y in [r-1,r+1,r]:
                        for x in [c-1,c+1,c]:
                            if (x>=0)and(y>=0) and (x<cols)and(y<rows) and ((x==c)or(y==r)) and (hotmap[y][x]!=None):
                                d.append(hotmap[y][x])
                    if len(d)>0:
                        k=grid[r][c]+min(d)
                        if (hotmap[r][c]!=None) and (hotmap[r][c]<k):
                            k=hotmap[r][c]
                        if k!=hotmap[r][c]:
                            changed=True
                            hotmap[r][c]=k
            itr+=1
        print('Iteration for search '+str(itr))
        #print(hotmap)
        total_cost = hotmap[-1][-1]
        return total_cost
    
    
    
    • def kimite(grid):
    • rows, cols = len(grid), len(grid[0])
    • position = (0, 0)
    • seen = set()
    • total_cost = 0
    • # Helper function to find the min cost based on current coordinates
    • def get_step_cost(*directions, ):
    • compare = sorted([i for i in directions if i != None], key=lambda x: x[0])
    • multiple = [x for x in [i for i in directions if i != None] if x[0] == compare[0][0]]
    • if len(multiple) > 1:
    • for i in multiple:
    • if i[1] == 'right':
    • return i
    • for i in multiple:
    • if i[1] == 'down':
    • return i
    • else:
    • return compare[0]
    • # Helper function to find polar directions
    • def get_direction():
    • up, down, left, right = None, None, None, None
    • # Check Y
    • if position[0] > 0 and (position[0] - 1, position[1]) not in seen:
    • up = (grid[position[0] - 1][position[1]], 'up')
    • if position[0] + 1 < rows and (position[0] + 1, position[1]) not in seen:
    • down = (grid[position[0] + 1][position[1]], 'down')
    • # Check X
    • if position[1] > 0 and (position[0], position[1] - 1) not in seen:
    • left = (grid[position[0]][position[1] - 1], 'left')
    • if position[1] + 1 < cols and (position[0], position[1] + 1) not in seen:
    • right = (grid[position[0]][position[1] + 1], 'right')
    • return (up, down, left, right)
    • # Traverse the grid to find the minimum cost path
    • while position != (rows - 1, cols - 1):
    • direction = get_direction()
    • cost, move = get_step_cost(*direction)
    • if move == 'up':
    • position = (position[0] - 1, position[1])
    • total_cost += cost
    • seen.add(position)
    • continue
    • if move == 'down':
    • position = (position[0] + 1, position[1])
    • total_cost += cost
    • seen.add(position)
    • continue
    • if move == 'left':
    • position = (position[0], position[1] - 1)
    • total_cost += cost
    • seen.add(position)
    • continue
    • if move == 'right':
    • position = (position[0], position[1] + 1)
    • total_cost += cost
    • seen.add(position)
    • # Burn method. Less optimal that Djikstra, but simple.
    • hotmap=[[None for c in range(cols)] for r in range(rows)]
    • hotmap[0][0]=grid[0][0] # first point
    • changed=True
    • itr=0
    • while changed:
    • changed=False
    • for r in range(rows):
    • for c in range(cols):
    • d=[]
    • for y in [r-1,r+1,r]:
    • for x in [c-1,c+1,c]:
    • if (x>=0)and(y>=0) and (x<cols)and(y<rows) and ((x==c)or(y==r)) and (hotmap[y][x]!=None):
    • d.append(hotmap[y][x])
    • if len(d)>0:
    • k=grid[r][c]+min(d)
    • if (hotmap[r][c]!=None) and (hotmap[r][c]<k):
    • k=hotmap[r][c]
    • if k!=hotmap[r][c]:
    • changed=True
    • hotmap[r][c]=k
    • itr+=1
    • print('Iteration for search '+str(itr))
    • #print(hotmap)
    • total_cost = hotmap[-1][-1]
    • return total_cost
Code
Diff
  • def text():
        d,k,r,s="description","Kata or Kumite","return","symbols"
        return f"""Compress large text - {d} of this {k} in small code.
    This {k} {d} has 462 {s} in 3 line, without title. You should wrote function, {r} this text, but you should use less {s} in you'r code that this text. Simple solution - just {r} \"\"\"source text\"\"\". More smart variant is: do mark often word as special {s} and replace his before {r}.
    How small code can be? Can you wrote more compressing? Let's check it!"""
    
    • #sample code:
    • def text():
    • the_text = """Compress large text - description of this Kata or Kumite in small code.
    • This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.
    • d,k,r,s="description","Kata or Kumite","return","symbols"
    • return f"""Compress large text - {d} of this {k} in small code.
    • This {k} {d} has 462 {s} in 3 line, without title. You should wrote function, {r} this text, but you should use less {s} in you'r code that this text. Simple solution - just {r} \"\"\"source text\"\"\". More smart variant is: do mark often word as special {s} and replace his before {r}.
    • How small code can be? Can you wrote more compressing? Let's check it!"""
    • return the_text # Do make 'def text()' more short
Matrix
Code
Diff
  • class OperationError(Exception):
        def __init__(self, message):            
            super().__init__(message)
    
    def matrix(matrix=list, operation=str, location=tuple, writedata=None):
        def op_write(i,j):
            matrix[i][j] = writedata
            return matrix
        op={"read":lambda i,j: matrix[i][j], "write":op_write}
        try:
            if not operation in op: raise OperationError("That is not a valid operation for this system.")
            return op[operation](location[0],location[1])
        except Exception as e:
            print(e)
            return -1
    • class OperationError(Exception):
    • def __init__(self, message):
    • super().__init__(message)
    • def matrix(matrix=list, operation=str, location=tuple, writedata=None):
    • def op_write(i,j):
    • matrix[i][j] = writedata
    • return matrix
    • op={"read":lambda i,j: matrix[i][j], "write":op_write}
    • try:
    • if operation == "read":
    • return matrix[location[0]][location[1]]
    • elif operation == "write":
    • matrix[location[0]][location[1]] = writedata
    • return matrix
    • else:
    • raise OperationError("That is not a valid operation for this system.")
    • if not operation in op: raise OperationError("That is not a valid operation for this system.")
    • return op[operation](location[0],location[1])
    • except Exception as e:
    • print(e)
    • return -1
    • return -1

Compress large text - description of this Kata or Kumite in small code.

This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return """source text""". More smart variant is: do mark often word as special symbols and replace his before return.

How small code can be? Can you wrote more compressing? Let's check it!

#sample code:
def text():
    the_text = """Compress large text - description of this Kata or Kumite in small code.

This Kata or Kumite description has 462 symbols in 3 line, without title. You should wrote function, return this text, but you should use less symbols in you'r code that this text. Simple solution - just return \"\"\"source text\"\"\". More smart variant is: do mark often word as special symbols and replace his before return.

How small code can be? Can you wrote more compressing? Let's check it!"""
    return the_text # Do make 'def text()' more short
Code
Diff
  • #include <ostream>
    using namespace std;
    
    string calculator(char op, int a, int b) {
        stringstream s;
        switch (op) {
            case '+': {s<<a+b; break;}
            case '-': {s<<a-b; break;}
            case '*': {s<<a*b; break;}
            case '%': {s<<a%b; break;}
            case '/': {if (b != 0) {s<<a/b;break;}/*else default: Invalid Input!*/}
            default:  {s<<"Invalid Input!";}
        }
        return s.str();
    }
    
    • #include <ostream>
    • using namespace std;
    • string calculator(char op, int a, int b) {
    • stringstream s;
    • switch (op) {
    • case '+': {s<<a+b; return s.str();}
    • case '-': {s<<a-b; return s.str();}
    • case '*': {s<<a*b; return s.str();}
    • case '/': {b != 0 ? s<<a/b : s<<"Invalid Input!"; return s.str();}
    • case '%': {s<<a%b; return s.str();}
    • default: return "Invalid Input!";
    • case '+': {s<<a+b; break;}
    • case '-': {s<<a-b; break;}
    • case '*': {s<<a*b; break;}
    • case '%': {s<<a%b; break;}
    • case '/': {if (b != 0) {s<<a/b;break;}/*else default: Invalid Input!*/}
    • default: {s<<"Invalid Input!";}
    • }
    • return s.str();
    • }