磁盘文件排序,提醒货仓损坏的解决办法
分类:pc28.am神测网

......

BITMAPINFOHEADER bi;

bi.biSize = sizeof(BITMAPINFOHEADER);
bi.biWidth = bmpScreen.bmWidth;
bi.biHeight = bmpScreen.bmHeight;
bi.biPlanes = 1;
bi.biBitCount = bmpScreen.bmBitsPixel; 
bi.biCompression = BI_RGB;
bi.biSizeImage = 0;
bi.biXPelsPerMeter = 0;
bi.biYPelsPerMeter = 0; 
bi.biClrUsed = 0; 
bi.biClrImportant = 0; 

DWORD dwBmpSize = ((bmpScreen.bmWidth * bi.biBitCount   31) / 32) * 4 * bmpScreen.bmHeight;  cBmpData = new unsigned char[dwBmpSize ]; 

GetDIBits(hdcScreen, hbmScreen, 0, (UINT)bmpScreen.bmHeight, cBmpData, (BITMAPINFO *)&bi, DIB_RGB_COLORS); 

DeleteObject(bmpScreen); 

ReleaseDC(hdcScreen); 

return cBmpData; 
} <---运行到这里时提示堆栈损坏

磁盘文件排序
主题素材陈述,来自《编制程序珠玑》:
输入:三个最多包罗n个不相像的正整数的文书,当中各样数都自愧弗如等于n,且n=10^7。
输出:拿到按从小到大升序排列的蕴藏全部输入的整数的列表。
原则:最多有大致1MB的内部存款和储蓄器空间可用,但磁盘空间丰裕。且供给运营时刻在5秒钟以下,10秒为最好结果。

1、引言

  位图是使用位(bit卡塔尔数组来对数码开展计算,排序和去重,其结构图如下:

    pc28.am神测网 1

  在那之中位图的索引映射须求仓库储存的值,位图索引所在地方的值表示索引对应的值是还是不是业已积存。

那是因为实际GetDIBits的第五个参数需要的其实是一个BITMAPINFO结构,而我们传入的是BITMAPINFOHEADER。

分析:

2、接口

 1 public interface ByteMap {
 2 
 3     /* get the value in the index of byte map
 4      * @param index the index in byte map to get 
 5      * @return 1 or 0 base the value in the index of byte map
 6      */
 7     int getByte(int index);
 8     
 9     /* set the value in the index of byte map
10      * @param index the index in byte map to set
11      * @param flag the value setted in the index of byte map
12      */
13     void setByte(int index, int flag);
14     
15     /* set the value in byte map from start to end
16      * @param start the start index in byte map
17      * @param end the end index in byte map
18      * @flag the value setted in byte map from start to end
19      */
20     void setByte(int start, int end, int flag);
21     
22     /*
23      * get the size of byte map
24      */
25     int getSize();
26     
27     /*
28      * set the size of byte map
29      * @param size to be setted
30      */
31     void setSize(int size);
32     
33     /*
34      * count how many 1 in byte map
35      */
36     int countOne();
37     
38     /*
39      * count how many i in byte map from start to end
40      */
41     int countOne(int start, int end);
42     
43     /*
44      * get the sub map of byte map from start to end
45      */
46     ByteMap subMap(int start, int end);
47     
48     /*
49      * flip the value in byte map 
50      */
51     ByteMap not();
52     
53     /*
54      * or operation with another byte map
55      */
56     ByteMap or(ByteMap byteMap);
57     
58     /*
59      * xor operation with another byte map
60      */
61     ByteMap xor(ByteMap byteMap);
62     
63     /*
64      * and operation with another byte map
65      */
66     ByteMap and(ByteMap byteMap);
67     
68     /*
69      * byte map left shift operation
70      */
71     ByteMap leftShift(int shiftStep);
72     
73     /*
74      * byte map right shift operation
75      */
76     ByteMap rightShift(int shiftStep);
77 }

设若在位图不低于拾伍人时,那是平价的。不过在位图小于十多少人时,它还索要别的的内部存款和储蓄器空间来存款和储蓄三个调色板数据,所以就能够时有发生仓库损坏的不当。

1、归总列排在一条线序。你恐怕会想到把磁盘文件进行归拢列排在一条线序,但难点必要中,你唯有1MB的内部存款和储蓄器空间可用,所以,合併列排在一条线序那么些主意十二分。

 3、实现

  定义静态byte数组常量,用于连忙考验位图上索引对应的值:

 1 private static final byte[] BYTE_VALUE = {
 2         0x0001,
 3         0x0002,
 4         0x0004,
 5         0x0008,
 6         0x0010,
 7         0x0020,
 8         0x0040,
 9         -0x0080
10 };

注脚字段:

1 private int size;
2 private byte b;
3 private byte[] biteMap;

里面size为位图的大小;当位图的size小于等于8时,使用b,不然使用biteMap

构造函数:

 1 public ByteMapImpl() {
 2     this(8);
 3 }
 4     
 5 public ByteMapImpl(int size) {
 6     if(size <= 0) {
 7         throw new IllegalArgumentException("ByteMap size value should be positive");
 8     }
 9     this.size = size;
10     if(size <= 8) {
11         this.b = 0;
12     }else {
13         int len = (size >> 3)   ((size & 7) > 0 ? 1 : 0);
14         this.biteMap = new byte[len];
15     }
16 }

 当中位图的目录从0起初的

位图最根本的多个接口是:

 1 public int getByte(int index) {
 2     if(index < 0 || index >= this.size) {
 3         throw new IllegalArgumentException("index out of bit map");
 4     }
 5     byte unit = (this.size <= 8) ? this.b : this.biteMap[index >> 3];
 6     int result = 0;
 7     result = unit & BYTE_VALUE[index & 7];
 8     return result == 0 ? 0 : 1;
 9 }
10     
11 public void setByte(int index, int flag) {
12     if(index < 0 || index >= this.size) {
13         throw new IllegalArgumentException("index out of bit map");
14     }
15     if(flag != 0 && flag != 1) {
16         throw new IllegalArgumentException("illegal flag argument, must be 1 or 0");
17     }
18         
19     if(flag == this.getByte(index)) {
20         return;
21     }
22     int offSet = index & 7;
23     if(this.size <= 8) {
24         if(flag == 1) {
25             this.b = (byte) (this.b | BYTE_VALUE[offSet]);
26         }else {
27             this.b = (byte) (this.b & (~BYTE_VALUE[offSet]));
28         }
29             
30     }else {
31         int unitIndex = index >> 3;
32         byte unit = this.biteMap[unitIndex];
33         if(flag == 1) {
34             this.biteMap[unitIndex] = (byte) (unit | BYTE_VALUE[offSet]);
35         }else {
36             this.biteMap[unitIndex] = (byte) (unit & (~BYTE_VALUE[offSet]));
37         }
38     }
39 }

因而位操作与,或以致运动完结以上多少个接口

pc28.am神测网,剩下的接口基于上述实现的:

  1 public void setByte(int start, int end, int flag) {
  2     for(int i = start ; i <= end;   i) {
  3         this.setByte(i, flag);
  4     }
  5 }
  6     
  7 public int getSize() {
  8     return size;
  9 }
 10 
 11 public void setSize(int size) {
 12     this.size = size;
 13 }
 14     
 15 public int countOne() {
 16     return this.countOne(0, this.size - 1);
 17 }
 18     
 19 public int countOne(int start, int end) {
 20     int count = 0;
 21     for(int i = start; i <= end; i  ) {
 22         count  = this.getByte(i);
 23     }
 24     return count;
 25 }
 26     
 27 public ByteMap subMap(int start, int end) {
 28     ByteMap byteMap = new ByteMapImpl(end - start   1);
 29     for(int i = start; i <= end;   i) {
 30         if(this.getByte(i) != 0) {
 31             byteMap.setByte(i - start, 1);
 32         }
 33     }
 34     return byteMap;
 35 }
 36     
 37 public  ByteMap not() {
 38     ByteMap byteMap = new ByteMapImpl(this.size);
 39     for(int i = 0; i < this.size;   i) {
 40         int flag = (this.getByte(i) == 0) ? 1 : 0;
 41         byteMap.setByte(i, flag);
 42     }
 43     return byteMap;
 44 }
 45     
 46 public ByteMap or(ByteMap byteMap) {
 47     int s1 = this.size;
 48     int s2 = byteMap.getSize();
 49     int orSize = s1 > s2 ? s1 : s2;
 50     ByteMap orMap = new ByteMapImpl(orSize);
 51     int i = 0;
 52     while(i < s1 && i < s2) {
 53         if(this.getByte(i) != 0 || byteMap.getByte(i) != 0) {
 54             orMap.setByte(i, 1);
 55         }
 56           i;
 57     }
 58     while(i < s1) {
 59         if(this.getByte(i) != 0) {
 60             orMap.setByte(i, 1);
 61         }
 62           i;
 63     }
 64     while(i < s2) {
 65         if(byteMap.getByte(i) != 0) {
 66             orMap.setByte(i, 1);
 67         }
 68           i;
 69     }
 70     return orMap;
 71 }
 72     
 73 public ByteMap xor(ByteMap byteMap) {
 74     int s1 = this.size;
 75     int s2 = byteMap.getSize();
 76     int xorSize = s1 > s2 ? s1 : s2;
 77     ByteMap xorMap = new ByteMapImpl(xorSize);
 78     int i = 0;
 79     while(i < s1 && i < s2) {
 80         if(this.getByte(i) !=  byteMap.getByte(i)) {
 81             xorMap.setByte(i, 1);
 82         }
 83           i;
 84     }
 85     while(i < s1) {
 86         if(this.getByte(i) != 0) {
 87             xorMap.setByte(i, 1);
 88         }
 89           i;
 90     }
 91     while(i < s2) {
 92         if(byteMap.getByte(i) != 0) {
 93             xorMap.setByte(i, 1);
 94         }
 95           i;
 96     }
 97     return xorMap;
 98 }
 99     
100 public ByteMap and(ByteMap byteMap) {
101     int s1 = this.size;
102     int s2 = byteMap.getSize();
103     int orSize = s1 > s2 ? s1 : s2;
104     ByteMap andMap = new ByteMapImpl(orSize);
105     int i = 0;
106     while(i < s1 && i < s2) {
107         if(this.getByte(i) != 0 && byteMap.getByte(i) != 0) {
108             andMap.setByte(i, 1);
109         }
110           i;
111     }
112     return andMap;
113 }
114     
115 public ByteMap leftShift(int shiftStep) {
116     ByteMap shiftMap = new ByteMapImpl(this.size);
117     if(this.countOne() > 0 && shiftStep < this.size) {
118         if(shiftStep < 0) {
119             return this.rightShift((0 - shiftStep));
120         }else {
121             for(int i = shiftStep; i < this.size;   i) {
122                 if(this.getByte(i) != 0) {
123                     shiftMap.setByte(i - shiftStep, 1);
124                 }
125             }
126         }
127     }
128     return shiftMap;
129 }
130     
131 public ByteMap rightShift(int shiftStep) {
132     ByteMap shiftMap = new ByteMapImpl(this.size);
133     if(this.countOne() > 0 && shiftStep < this.size) {
134         if(shiftStep < 0) {
135             return this.leftShift((0 - shiftStep));
136         }else {
137             for(int i = this.size - shiftStep - 1; i >= 0; --i) {
138                 if(this.getByte(i) != 0) {
139                     shiftMap.setByte(i   shiftStep, 1);
140                 }
141             }
142         }
143     }
144     return shiftMap;
145 }

准确的做法是这么的

2、位图方案。比如正如《编制程序珠玑》生机勃勃书上所述,用一个贰九人长的位字符串来代表贰个全数因素都自愧弗如20的简要的非负整数会集,边框用如下字符串来代表集结{1,2,3,5,8,13}:
0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0
上述集结中各数对应的职位则置1,未有对应的数的地点则置0。

4、测试

为了便利测量试验,重写Object类型的toString方法:

 1 @Override
 2 public String toString() {
 3     StringBuilder sb = new StringBuilder();
 4     if(this.size <= 8) {
 5         for(int i = 0; i < 8;   i) {
 6             if(i < 7) {
 7                 try {
 8                     sb.append(this.getByte(i)   ",");
 9                 } catch (IllegalArgumentException e) {
10                     sb.append("0,");
11                 }
12             }else {
13                 try {
14                     sb.append(this.getByte(i));
15                 } catch (IllegalArgumentException e) {
16                     sb.append("0");
17                 }
18             }
19         }
20     }else {
21         for(int i = 0; i < this.biteMap.length*8;   i) {
22             if((i&7) == 0) {
23                 sb.append("n"   (i>>3)   ":");
24             }else {
25                 sb.append(",");
26             }
27             try {
28                 sb.append(this.getByte(i));
29             } catch (IllegalArgumentException e) {
30                 sb.append("0");
31             }
32         }
33     }
34     return sb.toString();
35 }

测试main函数:

 1 public static void main(String[] args) {
 2     ByteMap byteMap1 = new ByteMapImpl(60);
 3     byteMap1.setByte(3, 5, 1);
 4     System.out.println("byteMap1:"   byteMap1.toString());
 5     System.out.println("byteMap1 count 1:"   byteMap1.countOne());
 6     ByteMap byteMap2 = byteMap1.subMap(1, 11);
 7     System.out.println("byteMap1 sub map:"   byteMap2);
 8     System.out.println("byteMap1 sub map count 1:"   byteMap2.countOne());
 9     System.out.println("or map:"   byteMap1.or(byteMap2));
10     System.out.println("and map:"   byteMap1.and(byteMap2));
11     System.out.println("xor map:"   byteMap1.xor(byteMap2));
12     System.out.println("byteMap1 left shift 3 bit:"   byteMap1.leftShift(3));
13     System.out.println("byteMap1 right shift 3 bite:"   byteMap1.rightShift(3));
14 }

结果:

byteMap1:
0:0,0,0,1,1,1,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
byteMap1 count 1:3
byteMap1 sub map:
0:0,0,1,1,1,0,0,0
1:0,0,0,0,0,0,0,0
byteMap1 sub map count one:3
or map:
0:0,0,1,1,1,1,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
and map:
0:0,0,0,1,1,0,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
xor map:
0:0,0,1,0,0,1,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
byteMap1 left shift 3 bit:
0:1,1,1,0,0,0,0,0
1:0,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
byteMap1 right shift 3 bite:
0:0,0,0,0,0,0,1,1
1:1,0,0,0,0,0,0,0
2:0,0,0,0,0,0,0,0
3:0,0,0,0,0,0,0,0
4:0,0,0,0,0,0,0,0
5:0,0,0,0,0,0,0,0
6:0,0,0,0,0,0,0,0
7:0,0,0,0,0,0,0,0
struct { BITMAPINFO info; RGBQUAD moreColors[255]; } fbi;
BITMAPINFOHEADER &bi = fbi.info.bmiHeader;
bi.biSize = sizeof(BITMAPINFOHEADER);
...
GetDIBits(..., &fbi.info, ...);

参照他事他说加以侦察《编制程序珠玑》豆蔻梢头书上的位图方案,针对10^7个数据量的磁盘文件排序难点,能够如此思量,由于各样7位十进制整数表示贰个低于1000万的大背头。能够接收叁个持有1000万个位的字符串来表示这么些文件,在那之中,当且仅当整数i在文书中留存时,第i位为1。选拔那个位图的方案是因为大家直面的这么些主题素材的特殊性:1、输入数据节制在对立不大的范围内,2、数据未有再一次,3、此中的每条记下都以单纯的正整数,未有其余别的与之提到的数目。
于是,此难点用位图的方案分为以下三步实行减轻:
率先步,将具有的位都置为0,进而将集聚起来化为空。
其次步,通过读入文件中的每一个整数来创设集聚,将种种对应的位都置为1。
其三步,考验每壹位,假使该位为1,就输出对应的卡尺头。
透过上述三步后,爆发有序的出口文件。令n为位图向量中的位数(本例中为10000000卡塔 尔(英语:State of Qatar),程序能够用伪代码表示如下:

5、总结

  使用位图相符稠密数据,不然会招致大气空中的浪费

 

//第一步,将所有的位都初始化为0   
for i ={0,....n}  
    bit[i]=0;  

//第二步,通过读入文件中的每个整数来建立集合,将每个对应的位都置为1。   
for each i in the input file  
    bit[i]=1;  

//第三步,检验每一位,如果该位为1,就输出对应的整数。   
for i={0...n}  
    if bit[i]==1  
      write i on the output file  

6、思考

  (1)为什么 BYTE_VALUE[7] = -0x0080;

  (2卡塔尔国达成的位图是非线程安全的,怎么样矫正不仅可以保障线程安全,又不会大幅减弱品质;

 

可是相当慢,大家就将开采到,用此位图方法,严俊说来依旧不太行,空间消耗10^7/8依旧超过1M(1M=1024*1024空间,小于10^7/8)。
既然如若用位图方案以来,大家必要约1.25MB(若每条记下是8位的正整数的话,则10000000/(1024*1024*8) ~= 1.2M卡塔尔国的空间,而目前独有1MB的可用存款和储蓄空间,那么到底该作何地理啊?能够再三运用位图实行排序。

源码参谋:

3、多路归总。把那一个文件分为若干大大小小的几块,然后分别对每一块举办排序,最后造成全套经过的排序。k趟算法能够在kn的时间支付内和n/k的空中开拓内成功对最多n个小于n的无重复正整数的排序。比如可分为2块(k=2,1趟左右占用的内部存储器只有1.25/2=0.625M卡塔尔国,1~4999999,和5000000~9999999先遍历生龙活虎趟,管理1~4999999的数据块(用5000000/8=625000个字的蕴藏空间来排序0~4999999以内的整数卡塔 尔(英语:State of Qatar),然后再第二趟,对5000001~1000000那生机勃勃数目块拍卖。

针对这一个要分两趟给磁盘文件排序的切实难题编写完整代码,如下:

pc28.am神测网 2pc28.am神测网 3View Code

//copyright@ yansha     
//July、2010.05.30。     
//位图方案解决10^7个数据量的文件的排序问题     
//如果有重复的数据,那么只能显示其中一个 其他的将被忽略     
#include <iostream>     
#include <bitset>     
#include <assert.h>     
#include <time.h>     
using namespace std;    

const int max_each_scan = 5000000;    

int main()    
{    
    clock_t begin = clock();    
    bitset<max_each_scan> bit_map;    
    bit_map.reset();    

    // open the file with the unsorted data     
    FILE *fp_unsort_file = fopen("data.txt", "r");    
    assert(fp_unsort_file);    
    int num;    

    // the first time scan to sort the data between 0 - 4999999     
    while (fscanf(fp_unsort_file, "%d ", &num) != EOF)    
    {    
        if (num < max_each_scan)    
            bit_map.set(num, 1);    
    }    

    FILE *fp_sort_file = fopen("sort.txt", "w");    
    assert(fp_sort_file);    
    int i;    

    // write the sorted data into file     
    for (i = 0; i < max_each_scan; i  )    
    {    
        if (bit_map[i] == 1)    
            fprintf(fp_sort_file, "%d ", i);    
    }    

    // the second time scan to sort the data between 5000000 - 9999999     
    int result = fseek(fp_unsort_file, 0, SEEK_SET);    
    if (result)    
        cout << "fseek failed!" << endl;    
    else    
    {    
        bit_map.reset();    
        while (fscanf(fp_unsort_file, "%d ", &num) != EOF)    
        {    
            if (num >= max_each_scan && num < 10000000)    
            {    
                num -= max_each_scan;    
                bit_map.set(num, 1);    
            }    
        }    
        for (i = 0; i < max_each_scan; i  )    
        {    
            if (bit_map[i] == 1)    
                fprintf(fp_sort_file, "%d ", i   max_each_scan);    
        }    
    }    

    clock_t end = clock();    
    cout<<"用位图的方法,耗时:"<<endl;    
    cout << (end - begin) / CLK_TCK << "s" << endl;    
    fclose(fp_sort_file);    
    fclose(fp_unsort_file);    
    return 0;    
}    

上述的位图方案,共索要扫描输入数据四回,具体执行步骤如下:
第三回,只管理1—4999999之内的多寡,那么些数都以自轻自贱5000000的,对那几个数实行位图排序,只要求约5000000/8=625000Byte,相当于0.625M,排序后输出。
第一次,扫描输入文件时,只管理4999999-10000000的数据项,也只须要0.625M(能够使用第叁回拍卖申请的内部存储器卡塔尔。因而,总共也只供给0.625M。

磁盘文件排序的C落成

1、内排序
是因为须求的可用内存为1MB,那么每趟可以在内部存款和储蓄器中对250K的数码开展排序,然后将平稳的数写入硬盘。
那正是说10M的数据须要循环45遍,最后发生四十多个不改变的文本。

2、多路归总列排在一条线序
(1卡塔 尔(阿拉伯语:قطر‎将各种文件最开首的数读入(由于有序,所认为该公文最小数卡塔 尔(英语:State of Qatar),贮存在三个尺寸为40的first_data数组中;
(2)选择first_data数组中幽微的数min_data,及其对应的文件索引index;
(3)将first_data数组中渺小的数写入文件result,然后更新数组first_data(依据index读取该文件下多少个数替代min_data);
(4卡塔 尔(英语:State of Qatar)判定是或不是享有数据都读取完成,不然再次回到(2卡塔 尔(阿拉伯语:قطر‎。

完整代码如下:

pc28.am神测网 4pc28.am神测网 5View Code

//copyright@ yansha   
//July、updated,2011.05.28。   
#include <iostream>   
#include <string>   
#include <algorithm>   
#include <time.h>   
using namespace std;  

int sort_num = 10000000;  
int memory_size = 250000;  //每次只对250k个小数据量进行排序   

int read_data(FILE *fp, int *space)  
{  
    int index = 0;  
    while (index < memory_size && fscanf(fp, "%d ", &space[index]) != EOF)  
        index  ;  

    return index;  
}  

void write_data(FILE *fp, int *space, int num)  
{  
    int index = 0;  
    while (index < num)  
    {  
        fprintf(fp, "%d ", space[index]);  
        index  ;  
    }  
}  

// check the file pointer whether valid or not.   
void check_fp(FILE *fp)  
{  
    if (fp == NULL)  
    {  
        cout << "The file pointer is invalid!" << endl;  
        exit(1);  
    }  
}  

int compare(const void *first_num, const void *second_num)  
{  
    return *(int *)first_num - *(int *)second_num;  
}  

string new_file_name(int n)  
{  
    char file_name[20];  
    sprintf(file_name, "data%d.txt", n);  
    return file_name;  
}  

int memory_sort()  
{  
    // open the target file.   
    FILE *fp_in_file = fopen("data.txt", "r");  
    check_fp(fp_in_file);  

    int counter = 0;  
    while (true)  
    {  
        // allocate space to store data read from file.   
        int *space = new int[memory_size];  
        int num = read_data(fp_in_file, space);  

        // the memory sort have finished if not numbers any more.   
        if (num == 0)  
            break;  

        // quick sort.   
        qsort(space, num, sizeof(int), compare);  

        // create a new auxiliary file name.   
        string file_name = new_file_name(  counter);  
        FILE *fp_aux_file = fopen(file_name.c_str(), "w");  
        check_fp(fp_aux_file);  

        // write the orderly numbers into auxiliary file.   
        write_data(fp_aux_file, space, num);  

        fclose(fp_aux_file);  
        delete []space;  
    }  
    fclose(fp_in_file);  

    // return the number of auxiliary files.   
    return counter;  
}  

void merge_sort(int file_num)  
{  
    if (file_num <= 0)  
        return;  

    // create a new file to store result.   
    FILE *fp_out_file = fopen("result.txt", "w");  
    check_fp(fp_out_file);  

    // allocate a array to store the file pointer.   
    FILE **fp_array = new FILE *[file_num];  

    int i;  
    for (i = 0; i < file_num; i  )  
    {  
        string file_name = new_file_name(i   1);  
        fp_array[i] = fopen(file_name.c_str(), "r");  
        check_fp(fp_array[i]);  
    }  

    int *first_data = new int[file_num]; //new出个大小为0.1亿/250k数组,由指针first_data指示数组首地址   

    bool *finish = new bool[file_num];  
    memset(finish, false, sizeof(bool) * file_num);  

    // read the first number of every auxiliary file.   
    for (i = 0; i < file_num; i  )  
        fscanf(fp_array[i], "%d ", &first_data[i]);  

    while (true)  
    {  
        int index = 0;  
        while (index < file_num && finish[index])  
            index  ;  

        // the finish condition of the merge sort.   
        //要保证所有文件都读完,必须使得finish[0]...finish[40]都为真   
        if (index >= file_num)  
            break;  

        int min_data = first_data[index];  

        // choose the relative minimum in the array of first_data.   
        for (i = index   1; i < file_num; i  )  
        {  
            if (min_data > first_data[i] && !finish[i])//比min_data更小的数据first_data[i]   
            {  
                min_data = first_data[i];    //则置min_data<-first_data[i]   
                index = i;                   //把下标i 赋给index。   
            }  
        }  

        // write the orderly result to file.   
        fprintf(fp_out_file, "%d ", min_data);  

        if (fscanf(fp_array[index], "%d ", &first_data[index]) == EOF)  
            finish[index] = true;  
    }  

    fclose(fp_out_file);  

    delete []finish;  
    delete []first_data;  

    for (i = 0; i < file_num; i  )  
        fclose(fp_array[i]);  

    delete [] fp_array;  
}  

int main()  
{  
    clock_t start_memory_sort = clock();  
    int aux_file_num = memory_sort();  
    clock_t end_memory_sort = clock();  
    cout << "The time needs in memory sort: " << end_memory_sort - start_memory_sort << endl;  

    clock_t start_merge_sort = clock();  
    merge_sort(aux_file_num);  
    clock_t end_merge_sort = clock();  
    cout << "The time needs in merge sort: " << end_merge_sort - start_merge_sort << endl;  
    system("pause");  
    return 0;  
}  

测验数据:生成1000万个不另行的正整数

pc28.am神测网 6pc28.am神测网 7View Code

//生成随机的不重复的测试数据   
#include <iostream>   
#include <time.h>   
#include <assert.h>   
using namespace std;  

//产生[l,u]区间的随机数   
int randint(int l, int u)  
{  
 return l (RAND_MAX*rand() rand())%(u-l 1);  
}  

 //1000W的int,大约4M的数据,如果放在mian内,在我的机子上好像是栈溢出了,放在全局空间就没问题   
const int size = 10000000;  
int num[size];  

int main()  
{  
    int i, j;  
    FILE *fp = fopen("data.txt", "w");  
    assert(fp);  
    for (i = 0; i < size; i  )  
        num[i] = i 1;  
    srand((unsigned)time(NULL));  
    for (i = 0; i < size; i  )  
    {  
        j = randint(i, size-1);  
        int t = num[i]; num[i] = num[j]; num[j] = t;  
        //swap(num[i], num[j]);   
    }  
    for (i = 0; i < size; i  )  
        fprintf(fp, "%d ", num[i]);  
    fclose(fp);  
    return 0;  
}  

 

 

本文由pc28.am发布于pc28.am神测网,转载请注明出处:磁盘文件排序,提醒货仓损坏的解决办法

上一篇:桥接形式,hadoop情状搭建之有关NAT格局静态IP的装 下一篇:没有了
猜你喜欢
热门排行
精彩图文