Sequence is used to relate two DUs, one that happens afer the other. Usually including at least one non-linguistic.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | def LF_Seq_NL_L_case1(row): #case 1 -- might be tagging a lot of results l=0 if has_resources(row.source_text): if is_head(cands, row.source_turn_id, row.source_span_end) and contains_resource_name(row.target_text): l=1 return l def LF_Seq_NL_L_case2(row): #case 2 l=0 if XbuiltaA(row.source_text) or boughtAcard(row.source_text) or willMoveTheRobber(row.source_text): if row.target_emitter == row.source_text.split(' ')[0]: l=1 return l def LF_Seq_NL_L_case3(row): #case 3 l=0 if traded(row.source_text) and not stoleAResourceFrom(row.source_text): if row.source_text.split(' ')[0] == row.target_emitter and row.target_dialogue_act == 'Offer' and row.distance == 1: l=1 return l def LF_Seq_NL_L_case4(row): #case 4 -- x sat down then next move is EDU by X l=0 if satDownAtSeat(row.source_text): if row.target_emitter == row.source_text.split(' ')[0] and row.distance == 1: l=1 return l |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | #1 -- get dialogues finals = [] dialogues = cands.dialogue_num.drop_duplicates() for d in tqdm(dialogues): #keep track of marked relations -- because we assume that each node can only take part in one sequence relation memo = [] # 2 -- get segment list seg_list = get_seg_list(cands[cands.dialogue_num == d]) # 3 -- create seg pairs list seg_pairs = [] for i, s in enumerate(seg_list): for n in [j for j in reversed(range((i+1)-20, i+1)) if j>=0]: try: seg_pairs.append((seg_list[n], seg_list[i+1])) except IndexError: pass # 4 -- for each pair, pull row and append rules for s in seg_pairs: row = cands[(cands.source_id == s[0]) & (cands.target_id == s[1])] if row.empty: continue else: r_index = row.index[0] row = row.iloc[0] # make sure that if any backwards links exist they are also 0 backwards = cands[(cands.source_id == s[1]) & (cands.target_id == s[0])] if backwards.shape[0] > 0: b_index = backwards.index[0] finals.append((b_index, -1)) #we only look at ling --> ling cases if row.source_type != 'NonplayerSegment' or row.target_type != 'Segment': link = 0 else: #try each of three cases, keeping track of the candidates which have been used in a sequence relation already if LF_Seq_NL_L_case1(row) or LF_Seq_NL_L_case2(row) or LF_Seq_NL_L_case3(row) or LF_Seq_NL_L_case4(row): #if LF_Seq_NL_L_case3(row): if row.source_id in memo or row.target_id in memo: link = 0 else: link = 1 memo.extend([row.source_id, row.target_id]) else: link = 0 finals.append((r_index, link)) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | #NB: the false positives that come from these rules are in part due to the rule not being able to see whether #there has already been a sequence relation added to between an EEU node and an intervening EDU node, which wouldn't #show up in the NL-NL case #TODO!! Figure out why we are missing the first eeus in the candidates. def LF_Seq_NL_NL_case1(row): #'has resources' to next eeu l=0 if row.source_turn_id != row.target_turn_id and has_resources(row.source_text): if is_head(cands, row.source_turn_id, row.source_span_end): l=1 return l def LF_Seq_NL_NL_case2(row): #target = X ended their turn to most recent 'X' eeu l=0 if endedtheirturn(row.target_text): if not has_resources(row.source_text) and not stoleAResourceFrom(row.source_text): if row.target_text.split(' ')[0] == row.source_text.split(' ')[0] and row.distance <= 3: l=1 else: l=0 return l def LF_Seq_NL_NL_case3(row): #For ever mid-game set of moves where each turn == 1 segment and distance == 1 l=0 if row.distance == 1 and not has_resources(row.source_text) \ and not youcantmaketrade(row.source_text) and not stoleAResourceFrom(row.source_text): l=1 return l def LF_Seq_NL_NL_case4(row): #For beginning game moves -- if target is "sat down at" or "game started" l=0 if satDownAtSeat(row.target_text) or joinedTheGame(row.target_text) or gamestarted(row.target_text): l=1 return l |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | #1 -- get dialogues finals = [] dialogues = cands.dialogue_num.drop_duplicates() #dialogues = [98] for d in tqdm(dialogues): #keep track of dus involved in a seq as source or target -- on du cannot be source or target for > 1 seq rel memo_source = [] memo_target = [] # 2 -- get segment list seg_list = get_seg_list(cands[cands.dialogue_num == d]) # 3 -- create seg pairs list seg_pairs = [] for i, s in enumerate(seg_list): for n in [j for j in reversed(range((i+1)-20, i+1)) if j>=0]: try: seg_pairs.append((seg_list[n], seg_list[i+1])) except IndexError: pass # 4 -- for each pair, pull row and append rules for s in seg_pairs: row = cands[(cands.source_id == s[0]) & (cands.target_id == s[1])] if row.empty: continue else: r_index = row.index[0] row = row.iloc[0] # make sure that if any backwards links exist they are also 0 backwards = cands[(cands.source_id == s[1]) & (cands.target_id == s[0])] if backwards.shape[0] > 0: b_index = backwards.index[0] finals.append((b_index, -1)) #we only look at ling --> ling cases if row.source_type != 'NonplayerSegment' or row.target_type != 'NonplayerSegment': link = 0 else: #if LF_Seq_NL_NL_case3(row): if LF_Seq_NL_NL_case1(row) or LF_Seq_NL_NL_case2(row) or LF_Seq_NL_NL_case3(row) or LF_Seq_NL_NL_case4(row): if row.target_id in memo_target or row.source_id in memo_source: link = 0 else: link = 1 memo_target.append(row.target_id) memo_source.append(row.source_id) else: link = 0 finals.append((r_index, link)) |