前言

最近在学堆中_IO_FILE的利用,也就是各种how2heap了

发现对_IO_FILE的具体规则及代码运用其实还不是很熟悉

恰好发现了rap-cp和桑榆两个大佬的博客文章

认真学习了一番

并记录自己的学习收获

glibc使用2.31,因为自己的ubuntu默认libc是2.31的,比较方便

版本差异导致有些代码并不能完全对应,不过问题不大


很多函数的原型不太好找是因为

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# define libc_hidden_proto(name, attrs...)
# define libc_hidden_tls_proto(name, attrs...)
# define libc_hidden_def(name)
# define libc_hidden_weak(name)
# define libc_hidden_ver(local, name)
# define libc_hidden_data_def(name)
# define libc_hidden_tls_def(name)
# define libc_hidden_data_weak(name)
# define libc_hidden_data_ver(local, name)

versioned_symbol (libc, _IO_new_do_write, _IO_do_write, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_attach, _IO_file_attach, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_close_it, _IO_file_close_it, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_finish, _IO_file_finish, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_fopen, _IO_file_fopen, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_init, _IO_file_init, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_setbuf, _IO_file_setbuf, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_sync, _IO_file_sync, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_overflow, _IO_file_overflow, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_seekoff, _IO_file_seekoff, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_underflow, _IO_file_underflow, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_write, _IO_file_write, GLIBC_2_1);
versioned_symbol (libc, _IO_new_file_xsputn, _IO_file_xsputn, GLIBC_2_1);

等一系列原型隐藏符号映射宏

fopen

fopen实际上是 _IO_new_fopen函数,该函数在/libio/iofopen.c文件中

1
2
3
4
5
FILE *
_IO_new_fopen (const char *filename, const char *mode)
{
return __fopen_internal (filename, mode, 1);
}

可以看到_IO_new_fopen仅仅是调用了__fopen_internal

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
FILE *
__fopen_internal (const char *filename, const char *mode, int is32)
{
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));

if (new_f == NULL)
return NULL;
#ifdef _IO_MTSAFE_IO
new_f->fp.file._lock = &new_f->lock;
#endif
_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
_IO_new_file_init_internal (&new_f->fp);
if (_IO_file_fopen ((FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);

_IO_un_link (&new_f->fp);
free (new_f);
return NULL;
}

__fopen_internal其实就差不多已经包含了整个流程了

整个__fopen_internal函数包含四个部分:

  1. malloc分配内存空间。
  2. _IO_no_init 对file结构体进行null初始化。
  3. _IO_file_init将结构体链接进_IO_list_all链表。
  4. _IO_file_fopen执行系统调用打开文件。

0x1

开头声明了一个结构体并实例化了一个对象

1
2
3
4
5
6
7
8
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));

在64位系统中大小为0x230

该结构体包含三个结构体_IO_FILE_plus_IO_lock_t_IO_wide_data,其中_IO_FILE_plus为使用的IO FILE的结构体。

0x2

_IO_no_init 对file结构体进行null初始化

在分配完空间后,接着就调用_IO_no_init函数去null初始化结构体,跟进去该函数,函数在/libio/genops.c中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
void
_IO_no_init (FILE *fp, int flags, int orientation,
struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
{
_IO_old_init (fp, flags);
fp->_mode = orientation;
if (orientation >= 0)
{
fp->_wide_data = wd;
fp->_wide_data->_IO_buf_base = NULL;
fp->_wide_data->_IO_buf_end = NULL;
fp->_wide_data->_IO_read_base = NULL;
fp->_wide_data->_IO_read_ptr = NULL;
fp->_wide_data->_IO_read_end = NULL;
fp->_wide_data->_IO_write_base = NULL;
fp->_wide_data->_IO_write_ptr = NULL;
fp->_wide_data->_IO_write_end = NULL;
fp->_wide_data->_IO_save_base = NULL;
fp->_wide_data->_IO_backup_base = NULL;
fp->_wide_data->_IO_save_end = NULL;

fp->_wide_data->_wide_vtable = jmp;
}
else
/* Cause predictable crash when a wide function is called on a byte
stream. */
fp->_wide_data = (struct _IO_wide_data *) -1L;
fp->_freeres_list = NULL;
}

_IO_old_init

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
void
_IO_old_init (FILE *fp, int flags)
{
fp->_flags = _IO_MAGIC|flags;
fp->_flags2 = 0;
if (stdio_needs_locking)
fp->_flags2 |= _IO_FLAGS2_NEED_LOCK;
fp->_IO_buf_base = NULL;
fp->_IO_buf_end = NULL;
fp->_IO_read_base = NULL;
fp->_IO_read_ptr = NULL;
fp->_IO_read_end = NULL;
fp->_IO_write_base = NULL;
fp->_IO_write_ptr = NULL;
fp->_IO_write_end = NULL;
fp->_chain = NULL; /* Not necessary. */

fp->_IO_save_base = NULL;
fp->_IO_backup_base = NULL;
fp->_IO_save_end = NULL;
fp->_markers = NULL;
fp->_cur_column = 0;
#if _IO_JUMPS_OFFSET
fp->_vtable_offset = 0;
#endif
#ifdef _IO_MTSAFE_IO
if (fp->_lock != NULL)
_IO_lock_init (*fp->_lock);
#endif
}

根据参数初始化_IO_wide_data与_IO_FILE_plus,多数置为NULL

0x3

_IO_file_init将结构体链接进_IO_list_all

在执行完_IO_no_init函数后,回到__fopen_internal函数,

函数将_IO_FILE_plus结构体的vtable设置成了_IO_file_jumps

然后调用_IO_file_init_internal_IO_FILE_plus结构体链接进入_IO_list_all链表,跟进去函数,函数在/libio/fileops.c中:

1
2
3
4
5
6
7
8
9
10
11
12
void
_IO_new_file_init_internal (struct _IO_FILE_plus *fp)
{
/* POSIX.1 allows another file handle to be used to change the position
of our file descriptor. Hence we actually don't know the actual
position before we do the first fseek (and until a following fflush). */
fp->file._offset = _IO_pos_BAD;
fp->file._flags |= CLOSED_FILEBUF_FLAGS;

_IO_link_in (fp);
fp->file._fileno = -1;
}

这个函数的主体就是调用了_IO_link_in函数,跟进去,函数在/libio/genops.c中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
void
_IO_link_in (struct _IO_FILE_plus *fp)
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (FILE *) fp;
_IO_flockfile ((FILE *) fp);
#endif
fp->file._chain = (FILE *) _IO_list_all;
_IO_list_all = fp;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}

_IO_link_in函数的功能是检查FILE结构体是否包含_IO_LINKED标志,如果不包含则表示这个结构体没有链接进入_IO_list_all,则再后面把它链接进入_IO_list_all链表,同时设置FILE结构体的_chain字段为之前的链表的值,否则直接返回。

所以_IO_file_init主要功能是将FILE结构体链接进入_IO_list_all链表,在没执行_IO_file_init函数前_IO_list_all指向的是stderr结构体

0x4_IO_new_file_fopen

1
_IO_file_fopen ((FILE *) new_f, filename, mode, is32)

注意:上面的函数调用过程中将 new_f 指针从 locked_FILE 转为了 FILE,这样做是合法的,因为 locked_FILE 中第一个变量_IO_FILE_plus 的首个变量即是 FILE,实际上这样做使得 new_f 指针的访问被截断,只能访问前面 FILE 中的内容。

这里做了符号映射,实际调用_IO_file_fopen 被映射为_IO_new_file_fopen

1
versioned_symbol (libc, _IO_new_file_fopen, _IO_file_fopen, GLIBC_2_1);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
FILE *
_IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,
int is32not64)
{
int oflags = 0, omode;
int read_write;
int oprot = 0666;
int i;
FILE *result;
const char *cs;
const char *last_recognized;

if (_IO_file_is_open (fp))//如果文件已经打开
return 0;
switch (*mode)//参数选项不为空
{
case 'r':
omode = O_RDONLY;
read_write = _IO_NO_WRITES;
break;
case 'w':
omode = O_WRONLY;
oflags = O_CREAT|O_TRUNC;
read_write = _IO_NO_READS;
break;
case 'a':
omode = O_WRONLY;
oflags = O_CREAT|O_APPEND;
read_write = _IO_NO_READS|_IO_IS_APPENDING;
break;
default:
__set_errno (EINVAL);
return NULL;
}
last_recognized = mode;
for (i = 1; i < 7; ++i)
{
switch (*++mode)
{
case '\0':
break;
case '+':
omode = O_RDWR;
read_write &= _IO_IS_APPENDING;
last_recognized = mode;
continue;
case 'x':
oflags |= O_EXCL;
last_recognized = mode;
continue;
case 'b':
last_recognized = mode;
continue;
case 'm':
fp->_flags2 |= _IO_FLAGS2_MMAP;
continue;
case 'c':
fp->_flags2 |= _IO_FLAGS2_NOTCANCEL;
continue;
case 'e':
oflags |= O_CLOEXEC;
fp->_flags2 |= _IO_FLAGS2_CLOEXEC;
continue;
default:
/* Ignore. */
continue;
}
break;
}

result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
is32not64);

if (result != NULL)
{
/* Test whether the mode string specifies the conversion. */
cs = strstr (last_recognized + 1, ",ccs=");
if (cs != NULL)
{
/* Yep. Load the appropriate conversions and set the orientation
to wide. */
struct gconv_fcts fcts;
struct _IO_codecvt *cc;
char *endp = __strchrnul (cs + 5, ',');
char *ccs = malloc (endp - (cs + 5) + 3);

if (ccs == NULL)
{
int malloc_err = errno; /* Whatever malloc failed with. */
(void) _IO_file_close_it (fp);
__set_errno (malloc_err);
return NULL;
}

*((char *) __mempcpy (ccs, cs + 5, endp - (cs + 5))) = '\0';
strip (ccs, ccs);

if (__wcsmbs_named_conv (&fcts, ccs[2] == '\0'
? upstr (ccs, cs + 5) : ccs) != 0)
{
/* Something went wrong, we cannot load the conversion modules.
This means we cannot proceed since the user explicitly asked
for these. */
(void) _IO_file_close_it (fp);
free (ccs);
__set_errno (EINVAL);
return NULL;
}

free (ccs);

assert (fcts.towc_nsteps == 1);
assert (fcts.tomb_nsteps == 1);

fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;

/* Clear the state. We start all over again. */
memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));

cc = fp->_codecvt = &fp->_wide_data->_codecvt;

cc->__cd_in.step = fcts.towc;

cc->__cd_in.step_data.__invocation_counter = 0;
cc->__cd_in.step_data.__internal_use = 1;
cc->__cd_in.step_data.__flags = __GCONV_IS_LAST;
cc->__cd_in.step_data.__statep = &result->_wide_data->_IO_state;

cc->__cd_out.step = fcts.tomb;

cc->__cd_out.step_data.__invocation_counter = 0;
cc->__cd_out.step_data.__internal_use = 1;
cc->__cd_out.step_data.__flags = __GCONV_IS_LAST | __GCONV_TRANSLIT;
cc->__cd_out.step_data.__statep = &result->_wide_data->_IO_state;

/* From now on use the wide character callback functions. */
_IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;

/* Set the mode now. */
result->_mode = 1;
}
}

return result;
}

1.入参及局部变量准备

不多赘述,准备与 fopen 相关的变量

1
2
3
4
5
6
7
8
9
10
11
210 FILE *
211 _IO_new_file_fopen (FILE *fp, const char *filename, const char *mode,
212 int is32not64)
213 {
214 int oflags = 0, omode;
215 int read_write;
216 int oprot = 0666;
217 int i;
218 FILE *result;
219 const char *cs;
220 const char *last_recognized;

2.如果文件已经打开,则返回 0

判断方式也很简单,查看 fp 的_fileno 是否被赋值,正常打开一次之后,该值将被赋值为对应的 fd

1
2
3
4
222   if (_IO_file_is_open (fp))
223 return 0;

565 #define _IO_file_is_open(__fp) ((__fp)->_fileno != -1)

3.解析文件打开的 mode

omode 记录 File access modes:只读/只写/读写

oflags 记录文件 open 的参数:

  • O_CREAT:Create file if it doesn’t exist
  • O_TRUNC:Truncate file to zero length
  • O_APPEND:Writes append to the file
  • O_EXCL:Fail if file already exists
  • O_CLOEXEC:Set close_on_exec

read_write 记录读写参数:

  • _IO_NO_READS:Reading not allowed
  • _IO_NO_WRITES:Writing not allowed
  • _IO_IS_APPENDING:追加模式

fp->_flags2 记录第二个 flags 信息:

  • _IO_FLAGS2_MMAP:使用 mmap
  • _IO_FLAGS2_NOTCANCEL:不取消模式
  • _IO_FLAGS2_CLOEXEC:lose_on_exec

last_recognized 记录最后检测到的模式。

4.调用_IO_file_open 打开文件

注意,这里大部分参数都是传入的,或者刚解析出来的,这个 oprot 是前文定义的局部变量int oprot = 0666,表示

  • 该文件拥有者对该文件拥有读写的权限但是没有操作的权限

  • 该文件拥有者所在组的其他成员对该文件拥有读写的权限但是没有操作的权限

  • 其他用户组的成员对该文件也拥有读写权限但是没有操作的权限

调用_IO_file_open 的流程中大致可以分为如下几步:

  • 根据 flags2 决定是调用__open_nocancel 还是__open;

  • 调用_IO_mask_flags 设定对应的 flags;

  • 针对 append 模式,移动文件指针到_IO_seek_end;
  • 将打开后的 fp link 到_IO_list_all 上
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
280   result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
281 is32not64);

179 FILE *
180 _IO_file_open (FILE *fp, const char *filename, int posix_mode, int prot,
181 int read_write, int is32not64)
182 {
183 int fdesc;
184 if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
185 fdesc = __open_nocancel (filename,
186 posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
187 else
188 fdesc = __open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
189 if (fdesc < 0)
190 return NULL;
191 fp->_fileno = fdesc;
192 _IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
193 /* For append mode, send the file offset to the end of the file. Don't
194 update the offset cache though, since the file handle is not active. */
195 if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
196 == (_IO_IS_APPENDING | _IO_NO_READS))
197 {
198 off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
199 if (new_pos == _IO_pos_BAD && errno != ESPIPE)
200 {
201 __close_nocancel (fdesc);
202 return NULL;
203 }
204 }
205 _IO_link_in ((struct _IO_FILE_plus *) fp);
206 return fp;
207 }

函数的主要功能就是执行系统调用open打开文件并将文件描述符赋值给FILE结构体的_fileno字段

最后再次调用_IO_link_in函数,确保该结构体被链接进入_IO_list_all链表。

5.查看打开的文件是否需要特殊转换

这里主要是针对宽字符进行相关的处理和模式设置,详细的内容就不赘述了,具体细节与正常的打开流程基本一致,最后设置宽字符的字符处理虚函数表_wide_vtable。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
if (result != NULL)
{
/* Test whether the mode string specifies the conversion. */
cs = strstr (last_recognized + 1, ",ccs=");
if (cs != NULL)
{
/* Yep. Load the appropriate conversions and set the orientation
to wide. */
struct gconv_fcts fcts;
struct _IO_codecvt *cc;
char *endp = __strchrnul (cs + 5, ',');
char *ccs = malloc (endp - (cs + 5) + 3);

if (ccs == NULL)
{
int malloc_err = errno; /* Whatever malloc failed with. */
(void) _IO_file_close_it (fp);
__set_errno (malloc_err);
return NULL;
}

*((char *) __mempcpy (ccs, cs + 5, endp - (cs + 5))) = '\0';
strip (ccs, ccs);

if (__wcsmbs_named_conv (&fcts, ccs[2] == '\0'
? upstr (ccs, cs + 5) : ccs) != 0)
{
/* Something went wrong, we cannot load the conversion modules.
This means we cannot proceed since the user explicitly asked
for these. */
(void) _IO_file_close_it (fp);
free (ccs);
__set_errno (EINVAL);
return NULL;
}

free (ccs);

assert (fcts.towc_nsteps == 1);
assert (fcts.tomb_nsteps == 1);

fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;

/* Clear the state. We start all over again. */
memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));

cc = fp->_codecvt = &fp->_wide_data->_codecvt;

cc->__cd_in.step = fcts.towc;

cc->__cd_in.step_data.__invocation_counter = 0;
cc->__cd_in.step_data.__internal_use = 1;
cc->__cd_in.step_data.__flags = __GCONV_IS_LAST;
cc->__cd_in.step_data.__statep = &result->_wide_data->_IO_state;

cc->__cd_out.step = fcts.tomb;

cc->__cd_out.step_data.__invocation_counter = 0;
cc->__cd_out.step_data.__internal_use = 1;
cc->__cd_out.step_data.__flags = __GCONV_IS_LAST | __GCONV_TRANSLIT;
cc->__cd_out.step_data.__statep = &result->_wide_data->_IO_state;

/* From now on use the wide character callback functions. */
_IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;

/* Set the mode now. */
result->_mode = 1;
}
}

0x6__fopen_maybe_mmap 函数

针对 flags2 为 mmap 且 flags 设定为”r”的模式,可以直接使用 mmap 内容的方式,因为不需要修改原文件内容,所以需要替换 fp 中字符操作的虚函数表,使用 maybe_mmap 类型的函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
33 FILE *
34 __fopen_maybe_mmap (FILE *fp)
35 {
36 #if _G_HAVE_MMAP
37 if ((fp->_flags2 & _IO_FLAGS2_MMAP) && (fp->_flags & _IO_NO_WRITES))
38 {
39 /* Since this is read-only, we might be able to mmap the contents
40 directly. We delay the decision until the first read attempt by
41 giving it a jump table containing functions that choose mmap or
42 vanilla file operations and reset the jump table accordingly. */
43
44 if (fp->_mode <= 0)
45 _IO_JUMPS_FILE_plus (fp) = &_IO_file_jumps_maybe_mmap;
46 else
47 _IO_JUMPS_FILE_plus (fp) = &_IO_wfile_jumps_maybe_mmap;
48 fp->_wide_data->_wide_vtable = &_IO_wfile_jumps_maybe_mmap;
49 }
50 #endif
51 return fp;
52 }

如果分配失败

1
2
3
_IO_un_link (&new_f->fp);
free (new_f);
return NULL;

取消链接并且free chunk返回NULL

fread

描述fread读取文件流的主要流程以及函数对IO FILE结构体以及结构体中的vtable的操作

这篇文章则是说在创建了文件FILE以后,fread如何实现从文件中读取数据的。

参考下流程图:

整体流程为fread调用_IO_sgetn_IO_sgetn调用vtable中的_IO_XSGETN也就是_IO_file_xsgetn_IO_file_xsgetnfread实现的核心函数。它的流程简单总结为:

  1. 判断fp->_IO_buf_base输入缓冲区是否为空,如果为空则调用的_IO_doallocbuf去初始化输入缓冲区。
  2. 在分配完输入缓冲区或输入缓冲区不为空的情况下,判断输入缓冲区是否存在数据。
  3. 如果输入缓冲区有数据则直接拷贝至用户缓冲区,如果没有或不够则调用__underflow函数执行系统调用读取数据到输入缓冲区,再拷贝到用户缓冲区。

fread实际上是_IO_fread函数,文件目录为/libio/iofread.c

1
2
3
4
5
6
7
8
9
10
11
12
13
size_t
_IO_fread (void *buf, size_t size, size_t count, FILE *fp)
{
size_t bytes_requested = size * count;
size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);//here
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}

核心是_IO_sgetn,在libio/gneops.c

1
2
3
4
5
6
size_t
_IO_sgetn (FILE *fp, void *data, size_t n)
{
/* FIXME handle putback buffer here! */
return _IO_XSGETN (fp, data, n);
}

_IO_XSGETN是一个宏

1
2
3
4
5
6
7
8
9
10
11
#define _IO_XSGETN(FP,DATA,N) JUMP2 (__xsgetn, FP, DATA, N)
#define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)
# define _IO_JUMPS_FUNC(THIS) (IO_validate_vtable (_IO_JUMPS_FILE_plus (THIS)))
#define _IO_JUMPS_FILE_plus(THIS) \
_IO_CAST_FIELD_ACCESS ((THIS), struct _IO_FILE_plus, vtable)
#define _IO_CAST_FIELD_ACCESS(THIS, TYPE, MEMBER) \
(*(_IO_MEMBER_TYPE (TYPE, MEMBER) *)(((char *) (THIS)) \
...... + offsetof(TYPE, MEMBER)))
Expands to:

((IO_validate_vtable ((*(__typeof__ (((struct _IO_FILE_plus){}).vtable) *)(((char *) ((fp))) + __builtin_offsetof (struct _IO_FILE_plus, vtable)))))->__xsgetn) (fp, data, n)

实际上就是FILE结构体中vtable的__xsgetn函数,跟进去/libio/fileops.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)
{
size_t want, have;
ssize_t count;
char *s = data;

want = n;

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}// 第一部分,如果fp->_IO_buf_base为空的话则调用`_IO_doallocbuf`
_IO_doallocbuf (fp);
}

while (want > 0)//!!!注意这个循环++++++
{
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have)// 第二部分,输入缓冲区里已经有足够的字符,则直接把缓冲区里的字符给目标buff
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;//---从这一部分也可以看出,输入多余程序指定的内容,多余部分会被留在缓冲区中供下一次使用---
want = 0;
}
else
{
if (have > 0)// 第二部分,输入缓冲区里有部分字符,但是没有达到fread的size需求,先把已有的拷贝至目标buff
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;//那么_IO_read_ptr==_IO_read_end
}
//可能有人会有疑惑,读入后s没有变化,那么每次读入不都是读入到同一个地方,会覆盖之前的吗
//见注1

/* Check for backup and repeat */
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
continue;
}

/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))//want小于buf承载极限
{
if (__underflow (fp) == EOF) // 第三部分,输入缓冲区里不能满足需求,调用__underflow读入数据,当然这是在want<buffer空间的情况下
break;

continue;//返回ptr则continue
}
//循环真正的内容一般执行到这里就结束了
//再往下的内容是buf分配失败情况下,直接调用sysread的代码
---------------------------------------------------------------------------------
/* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);

/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}

count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;

break;
}

s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}
}

return n - want;//返回读入的长度
}

IO_file_xsgetn是处理fread读入数据的核心函数,分为三个部分:

  • 第一部分是fp->_IO_buf_base为空的情况,表明此时的FILE结构体中的指针未被初始化,输入缓冲区未建立,则调用_IO_doallocbuf去初始化指针,建立输入缓冲区。
  • 第二部分是输入缓冲区里有输入,即fp->_IO_read_ptr小于fp->_IO_read_end,此时将缓冲区里的数据直接拷贝至目标buff。
  • 第三部分是输入缓冲区里的数据为空或者是不能满足全部的需求,则调用__underflow调用系统调用读入数据。

接下来对_IO_file_xsgetn这三部分进行跟进并分析。

1:

1
2
#define __mempcpy mempcpy
#define mempcpy(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N)))

因此s = __mempcpy (s, fp->_IO_read_ptr, have);语句会自动更新s的值

0x1

初始化输入缓冲区

fp->_IO_buf_base为空时,也就是输入缓冲区未建立时,代码调用_IO_doallocbuf函数去建立输入缓冲区。跟进_IO_doallocbuf函数,看下它是如何初始化输入缓冲区,为输入缓冲区分配空间的,文件在/libio/genops.c中:

1
2
3
4
5
6
7
8
9
10
void
_IO_doallocbuf (FILE *fp)
{
if (fp->_IO_buf_base)// 如何输入缓冲区不为空,直接返回
return;
if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)//检查标志位
if (_IO_DOALLOCATE (fp) != EOF)//调用vtable函数
return;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}

函数先检查fp->_IO_buf_base是否为空,如果不为空的话表明该输入缓冲区已被初始化,直接返回。如果为空,则检查fp->_flags看它是不是_IO_UNBUFFERED或者fp->_mode大于0,如果满足条件调用FILE的vtable中的_IO_file_doallocate,跟进去该函数,在/libio/filedoalloc.c中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
int
_IO_file_doallocate (FILE *fp)
{
size_t size;
char *p;
struct stat64 st;

size = BUFSIZ;
if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0) // 调用`_IO_SYSSTAT`获取FILE信息
{
if (S_ISCHR (st.st_mode))
{
/* Possibly a tty. */
if (
#ifdef DEV_TTY_P
DEV_TTY_P (&st) ||
#endif
local_isatty (fp->_fileno))
fp->_flags |= _IO_LINE_BUF;
}
#if defined _STATBUF_ST_BLKSIZE
if (st.st_blksize > 0 && st.st_blksize < BUFSIZ)
size = st.st_blksize;
#endif
}
p = malloc (size);
if (__glibc_unlikely (p == NULL))
return EOF;
_IO_setb (fp, p, p + size, 1);// 调用`_IO_setb`设置FILE缓冲区
return 1;
}

可以看到_IO_file_doallocate函数是分配输入缓冲区的实现函数,首先调用_IO_SYSSTAT去获取文件信息,_IO_SYSSTAT函数是vtable中的__stat函数,获取文件信息,修改相应需要申请的size。

空间申请出来后,调用_IO_setb,跟进去看它干了些啥,文件在/libio/genops.c中:

1
2
3
4
5
6
7
8
9
10
11
12
void
_IO_setb (FILE *f, char *b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}

函数相对比较简单的就是设置了_IO_buf_base_IO_buf_end,可以预料到_IO_setb函数执行完后,fp的这两个指针被赋上值了

到此,初始化缓冲区就完成了,函数返回_IO_file_doallocate后,接着_IO_file_doallocate也返回到_IO_file_xsgetn函数中.

0x2

拷贝输入缓冲区数据

如果输入缓冲区里存在已输入的数据,则把它直接拷贝到目标缓冲区里。

这部分比较简单,需要说明下,fp->_IO_read_base指向的是输入缓冲区的起始地址,fp->_IO_read_end指向的是输入缓冲区的结束地址。fp->_IO_read_ptr指向还未读入的数据

fp->_IO_read_end-fp->_IO_read_ptr之间的数据通过memcpy拷贝到目标缓冲区里。

0x3

执行系统调用读取数据

在输入缓冲区为0或者是不能满足需求的时候则会执行最后一步__underflow去执行系统调用read读取数据,并放入到输入缓冲区里。

进入到__underflow,文件在/libio/genops.c中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
int
__underflow (FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;

if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp);
}

函数稍微做一些检查就会调用_IO_UNDERFLOW函数,其中一个检查是如果fp->_IO_read_ptr小于fp->_IO_read_end则表明输入缓冲区里存在数据,可直接返回,否则则表示需要继续读入数据。

检查都通过的话就会调用_IO_UNDERFLOW函数,该函数是FILE结构体vtable里的_IO_new_file_underflow,跟进去看,文件在/libio/fileops.c里:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
int
_IO_new_file_underflow (FILE *fp)
{
ssize_t count;

/* C99 requires EOF to be "sticky". */
if (fp->_flags & _IO_EOF_SEEN)
return EOF;

if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;

if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
free (fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}

/* FIXME This can/should be moved to genops ?? */
if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED))
{
/* We used to flush all line-buffered stream. This really isn't
required by any standard. My recollection is that
traditional Unix systems did this for stdout. stderr better
not be line buffered. So we do just that here
explicitly. --drepper */
_IO_acquire_lock (stdout);

if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF))
== (_IO_LINKED | _IO_LINE_BUF))
_IO_OVERFLOW (stdout, EOF);

_IO_release_lock (stdout);
}

_IO_switch_to_get_mode (fp);

/* This is very tricky. We have to adjust those
pointers before we call _IO_SYSREAD () since
we may longjump () out while waiting for
input. Those pointers may be screwed up. H.J. */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;//重置诸多缓存指针
//为什么要重置write指针,因为read和write用的是同一个缓冲区,如果不重置wirte的指针的话,那么调用write显然就会冲突
count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);//最多读buf大小
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN, count = 0;
}
fp->_IO_read_end += count;//read_end指针拔高,这样退出该函数再次进入上层循环时就能直接从read_ptr指针处获得数据
if (count == 0)
{
/* If a stream is read to EOF, the calling application may switch active
handles. As a result, our offset cache would no longer be valid, so
unset it. */
fp->_offset = _IO_pos_BAD;
return EOF;
}
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
return *(unsigned char *) fp->_IO_read_ptr;
}

这个_IO_new_file_underflow函数,是最终调用系统调用的地方,在最终执行系统调用之前,仍然有一些检查,整个流程为:

  1. 检查FILE结构体的_flag标志位是否包含_IO_NO_READS,如果存在这个标志位则直接返回EOF,其中_IO_NO_READS标志位的定义是#define _IO_NO_READS 4 /* Reading not allowed */
  2. 如果fp->_IO_buf_base位null,则调用_IO_doallocbuf分配输入缓冲区。
  3. 接着初始化设置FILE结构体指针,将他们都设置成fp->_IO_buf_base
  4. 调用_IO_SYSREAD(vtable中的_IO_file_read函数),该函数最终执行系统调用read,读取文件数据,数据读入到fp->_IO_buf_base中,读入大小为输入缓冲区的大小fp->_IO_buf_end - fp->_IO_buf_base
  5. 设置输入缓冲区已有数据的size,即设置fp->_IO_read_endfp->_IO_read_end += count

其中第二步里面的如果fp->_IO_buf_base位null,则调用_IO_doallocbuf分配输入缓冲区,似乎有点累赘,因为之前已经分配了,这个原因在最后会说明。

其中第四步的_IO_SYSREAD(vtable中的_IO_file_read函数)的源码比较简单,就是执行系统调用函数read去读取文件数据,文件在libio/fileops.c,源码如下:

1
2
3
4
5
6
7
ssize_t
_IO_file_read (FILE *fp, void *buf, ssize_t size)
{
return (__builtin_expect (fp->_flags2 & _IO_FLAGS2_NOTCANCEL, 0)
? __read_nocancel (fp->_fileno, buf, size)
: __read (fp->_fileno, buf, size));
}

_IO_file_underflow函数执行完毕以后,FILE结构体中各个指针已被赋值,且文件数据已读入,输入缓冲区里已经有数据,

其中fp->_IO_read_ptr指向输入缓冲区数据的开始位置,fp->_IO_read_end指向输入缓冲区数据结束的位置:

函数执行完后,返回到_IO_file_xsgetn函数中,由于while循环的存在,重新执行第二部分,此时将输入缓冲区拷贝至目标缓冲区,最终返回。

至此,对于fread的源码分析结束。

0x4

如果分配buf时出错,则执行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  /* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);

/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}

count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;

break;
}

s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}

采用不使用缓冲区的方式读取

scanf

1
2
3
4
5
read
_IO_new_file_underflow at fileops.c
__GI__IO_default_uflow at genops.c
_IO_vfscanf_internal at vfscanf.c
__isoc99_scanf at at isoc99_scanf.c

栈回溯

可以看到scanf最终是调用stdin的vtable中的_IO_new_file_underflow去调用read的。

不过它并不是由_IO_file_xsgetn调用的,而是使用vtable中的__uflow,源码如下:

1
2
3
4
5
6
7
8
9
int
_IO_default_uflow (_IO_FILE *fp)
{
int ch = _IO_UNDERFLOW (fp);
if (ch == EOF)
return EOF;
return *(unsigned char *) fp->_IO_read_ptr++;
}
libc_hidden_def (_IO_default_uflow)

__uflow函数首先直接调用_IO_new_file_underflow,因此最终也是_IO_new_file_underflow实现的输入。之后其只返回_IO_read_ptr处的一个字符

gets

1
2
3
4
read
__GI__IO_file_underflow
__GI__IO_default_uflow
gets

函数调用栈与scanf基本一致:

其它

虽然不能说全部的io输入都是通过_IO_new_file_underflow函数最终实现的输入,但是应该也可以说大部分是使用_IO_new_file_underflow函数实现的。

但是仍然有一个问题,由于__uflow直接就调用了_IO_new_file_underflow函数,那么输入缓冲区是在哪里建立的呢

为了找到这个问题的答案,在程序进入到fscanf函数后又在malloc函数下了个断点,然后栈回溯:

1
2
3
4
5
6
7
8
9
malloc
__GI__IO_file_doallocate
__GI__IO_doallocbuf
__GI__IO_file_underflow
__GI__IO_default_uflow
__GI__IO_vfscanf
__isoc99_fscanf
main
__libc_start_main

原来是在__GI__IO_file_underflow分配的空间,回到上面看该函数的源码,确实有一段判断输入缓冲区如果为空则调用__GI__IO_doallocbuf函数建立输入缓冲区的代码,这就解释了__GI__IO_file_underflow第二步中为啥还会有个输入缓冲区判断的原因了。

fwrite

有点复杂,写的乱了点,之后改

fwrite实际上是_IO_fwrite,位于libio/iofwrite.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
size_t
_IO_fwrite (const void *buf, size_t size, size_t count, FILE *fp)
{
size_t request = size * count;
size_t written = 0;
CHECK_FILE (fp, 0);
if (request == 0)
return 0;
_IO_acquire_lock (fp);
if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
written = _IO_sputn (fp, (const char *) buf, request);
_IO_release_lock (fp);
/* We have written all of the input in case the return value indicates
this or EOF is returned. The latter is a special case where we
simply did not manage to flush the buffer. But the data is in the
buffer and therefore written as far as fwrite is concerned. */
if (written == request || written == EOF)
return count;
else
return written / size;
}

_IO_sputn是一个宏最终调用的是vtable中的_IO_file_xsputn,直接搜是找不到这个函数的定义,因为其实际上已被_IO_new_file_xsputn替代

_IO_new_file_xsputn位于libio/fileops.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
size_t
_IO_new_file_xsputn (FILE *f, const void *data, size_t n)
{
const char *s = (const char *) data;
size_t to_do = n;
int must_flush = 0;
size_t count = 0;

if (n <= 0)
return 0;
/* This is an optimized implementation.
If the amount to be written straddles a block boundary
(or the filebuf is unbuffered), use sys_write directly. */

/* First figure out how much space is available in the buffer. */
if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
{
count = f->_IO_buf_end - f->_IO_write_ptr;
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */

/* Then fill the buffer. */
if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
to_do -= count;
}
if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF)
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;

/* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);

if (do_write)
{
count = new_do_write (f, s, do_write);
to_do -= count;
if (count < do_write)
return n - to_do;
}

/* Now write out the remainder. Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do);
}
return n - to_do;
}

可以看到整体逻辑与fread几乎是一致的

0x1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
{
count = f->_IO_buf_end - f->_IO_write_ptr;
if (count >= n)
{
const char *p;
for (p = s + n; p > s; )
{
if (*--p == '\n')
{
count = p - s + 1;
must_flush = 1;
break;
}
}
}
}
else if (f->_IO_write_end > f->_IO_write_ptr)
count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */

如果文件流属于行缓冲模式

则倒序搜索数据串中是否存在’\n’

存在则设置count和must_flush标志

如果不处于行缓冲模式则根据缓冲区中是否有数据设置count

0x2

1
2
3
4
5
6
7
8
 if (count > 0)
{
if (count > to_do)
count = to_do;
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
to_do -= count;
}

如果count>0就先将已有的部分传递给文件流

0x3

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
  if (to_do + must_flush > 0)
{
size_t block_size, do_write;
/* Next flush the (full) buffer. */
if (_IO_OVERFLOW (f, EOF) == EOF)
/* If nothing else has to be written we must not signal the
caller that everything has been written. */
return to_do == 0 ? EOF : n - to_do;

/* Try to maintain alignment: write a whole number of blocks. */
block_size = f->_IO_buf_end - f->_IO_buf_base;
do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);
//清空缓冲区
if (do_write)
{
count = new_do_write (f, s, do_write);
to_do -= count;
if (count < do_write)
return n - to_do;
}

/* Now write out the remainder. Normally, this will fit in the
buffer, but it's somewhat messier for line-buffered files,
so we let _IO_default_xsputn handle the general case. */
if (to_do)
to_do -= _IO_default_xsputn (f, s+do_write, to_do);
}

如果 todo 还有剩余(即剩余空间不够)或 must_flush 被置为 1 的情况(即上面有 flush 的情况),需要做如下的处理:

  • 先调用_IO_OVERFLOW 将前面写满的 buffer 写入物理文件中,如果此时写入失败的话,那就需要做处理,如果 to_do == 0,即本次要写入的东西都写到缓冲 buffer 里面了,所以是写入失败的,需要返回 EOF,否则,说明 n - todo 字节的 buffer 被写入缓冲了。
  • 计算当前文件流对象的 buffer 大小 block_size(即_IO_buf_end-_IO_buf_base),如果 block_size 大于 128,则计算剩余未写入字节的余数 to_do % block_size,否则置为 0,计算 do_write 为剩余字节数减去上面计算处出的对齐余数。所以作用是将剩余的未写入字节数规整为 m*block_size + 剩余未满 block_size 字节的部分。
  • 调用 new_do_write 写入上面计算出的一整块数据(这些数据大小是 m 个 buffer 缓冲区大小),注意,这里返回的实际写入字节数 count 如果小于我们前面计算的 do_write 大小,那就直接返回已写入的字节数 n - to_do(说明有写入失败的情况存在)。
  • 最后,如果还有字节没有写入,那就需要调用_IO_default_xsputn 进行剩余字节的写入。
  • 最后的返回信息仍然是 n - to_do 字节

overflow

先看_IO_OVERFLOW (f, EOF)

其调用__overflow

代码位于libio/genops.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
int
_IO_new_file_overflow (FILE *f, int ch)
{
if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
{
f->_flags |= _IO_ERR_SEEN;//文件不允许写入
__set_errno (EBADF);
return EOF;
}
/* If currently reading or no buffer allocated. */
if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
{
/* Allocate a buffer if needed. */
if (f->_IO_write_base == NULL)
{
_IO_doallocbuf (f);
_IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
}
/* Otherwise must be currently reading.
If _IO_read_ptr (and hence also _IO_read_end) is at the buffer end,
logically slide the buffer forwards one block (by setting the
read pointers to all point at the beginning of the block). This
makes room for subsequent output.
Otherwise, set the read pointers to _IO_read_end (leaving that
alone, so it can continue to correspond to the external position). */
if (__glibc_unlikely (_IO_in_backup (f)))
{
size_t nbackup = f->_IO_read_end - f->_IO_read_ptr;
_IO_free_backup_area (f);
f->_IO_read_base -= MIN (nbackup,f->_IO_read_base - f->_IO_buf_base);
f->_IO_read_ptr = f->_IO_read_base;
}

if (f->_IO_read_ptr == f->_IO_buf_end)
f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
f->_IO_write_ptr = f->_IO_read_ptr;
f->_IO_write_base = f->_IO_write_ptr;
f->_IO_write_end = f->_IO_buf_end;
f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;

f->_flags |= _IO_CURRENTLY_PUTTING;//切换为写入模式
if (f->_mode <= 0 && f->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
f->_IO_write_end = f->_IO_write_ptr;
}
if (ch == EOF)
return _IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base);
if (f->_IO_write_ptr == f->_IO_buf_end ) /* Buffer is really full */
if (_IO_do_flush (f) == EOF)
return EOF;
*f->_IO_write_ptr++ = ch;
if ((f->_flags & _IO_UNBUFFERED)
|| ((f->_flags & _IO_LINE_BUF) && ch == '\n'))
if (_IO_do_write (f, f->_IO_write_base,
f->_IO_write_ptr - f->_IO_write_base) == EOF)
return EOF;
return (unsigned char) ch;
}

其又调用_IO_do_write

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
ssize_t
_IO_new_file_write (FILE *f, const void *data, ssize_t n)
{
ssize_t to_do = n;
while (to_do > 0)
{
ssize_t count = (__builtin_expect (f->_flags2
& _IO_FLAGS2_NOTCANCEL, 0)
? __write_nocancel (f->_fileno, data, to_do)
: __write (f->_fileno, data, to_do));
if (count < 0)
{
f->_flags |= _IO_ERR_SEEN;
break;
}
to_do -= count;
data = (void *) ((char *) data + count);//write_base后移
}
n -= to_do;
if (f->_offset >= 0)
f->_offset += n;
return n;
}

如果buf全满了

又会使用_IO_do_flush,相当于调用_IO_do_write

1
2
3
4
5
6
7
#define _IO_do_flush(_f) \
((_f)->_mode <= 0 \
? _IO_do_write(_f, (_f)->_IO_write_base, \
(_f)->_IO_write_ptr-(_f)->_IO_write_base) \
: _IO_wdo_write(_f, (_f)->_wide_data->_IO_write_base, \
((_f)->_wide_data->_IO_write_ptr \
- (_f)->_wide_data->_IO_write_base)))

0x31

new_do_write

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
static size_t
new_do_write (FILE *fp, const char *data, size_t to_do)
{
size_t count;
if (fp->_flags & _IO_IS_APPENDING)
/* On a system without a proper O_APPEND implementation,
you would need to sys_seek(0, SEEK_END) here, but is
not needed nor desirable for Unix- or Posix-like systems.
Instead, just indicate that offset (before and after) is
unpredictable. */
fp->_offset = _IO_pos_BAD;
else if (fp->_IO_read_end != fp->_IO_write_base)
{
off64_t new_pos
= _IO_SYSSEEK (fp, fp->_IO_write_base - fp->_IO_read_end, 1);
if (new_pos == _IO_pos_BAD)
return 0;
fp->_offset = new_pos;
}
count = _IO_SYSWRITE (fp, data, to_do);
if (fp->_cur_column && count)
fp->_cur_column = _IO_adjust_column (fp->_cur_column - 1, data, count) + 1;
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_buf_base;
fp->_IO_write_end = (fp->_mode <= 0
&& (fp->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
? fp->_IO_buf_base : fp->_IO_buf_end);
return count;
}

0x4_IO_default_xsputn

  • 处理局部变量赋值,同时考虑写入 size 小于等于 0 的情况,直接返回 0

  • 开始循环处理 data 数据

    如果还有剩余缓存空间,计算剩余缓存空间数量 count

    如果缓存空间比要写入的字节数量多,那就更新 count 为需要写入字节数;

    如果需要写入字节数大于 20,那就调用__mempcpy 写入

    否则就使用循环赋值的方式进行赋值(注意这里就是 Glibc 的精髓所在了,正常我们写代码可能就考虑循环赋值或者 memcpy 解决这个问题了,但是这里区分了情况,应该是考虑到了两者的性能差,为了达到最优情况,使用了分段处理的方式

  • 循环结束条件是剩余写入字符为 0,或调用_IO_OVERFLOW 写入 buffer 的同时写入下一个字符成功

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
size_t
_IO_default_xsputn (FILE *f, const void *data, size_t n)
{
const char *s = (char *) data;
size_t more = n;
if (more <= 0)
return 0;
for (;;)
{
/* Space available. */
if (f->_IO_write_ptr < f->_IO_write_end)
{
size_t count = f->_IO_write_end - f->_IO_write_ptr;
if (count > more)
count = more;
if (count > 20)
{
f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
s += count;
}
else if (count)
{
char *p = f->_IO_write_ptr;
ssize_t i;
for (i = count; --i >= 0; )
*p++ = *s++;
f->_IO_write_ptr = p;
}
more -= count;
}
if (more == 0 || _IO_OVERFLOW (f, (unsigned char) *s++) == EOF)
break;
more--;
}
return n - more;
}
libc_hidden_def (_IO_default_xsputn)

printf

调用栈

1
2
3
4
5
6
► f 0   0x7f6117fd43b0 write
f 1 0x7f6117f55c0f _IO_file_write+143
f 2 0x7f6117f5639a _IO_file_xsputn+426
f 3 0x7f6117f2cfa4 buffered_vfprintf+308
f 4 0x7f6117f2a33d vfprintf+445
f 5 0x7f6117f328a9 printf+153

puts

调用栈与fwrite大致相同

fclose

fclose实际上是_IO_new_fclose,位于libcio/iofclose.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
int
_IO_new_fclose (FILE *fp)
{
int status;

CHECK_FILE(fp, EOF);

#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
/* We desperately try to help programs which are using streams in a
strange way and mix old and new functions. Detect old streams
here. */
if (_IO_vtable_offset (fp) != 0)
return _IO_old_fclose (fp);
#endif

/* First unlink the stream. */
if (fp->_flags & _IO_IS_FILEBUF)
_IO_un_link ((struct _IO_FILE_plus *) fp);

_IO_acquire_lock (fp);
if (fp->_flags & _IO_IS_FILEBUF)
status = _IO_file_close_it (fp);
else
status = fp->_flags & _IO_ERR_SEEN ? -1 : 0;
_IO_release_lock (fp);
_IO_FINISH (fp);
if (fp->_mode > 0)
{
/* This stream has a wide orientation. This means we have to free
the conversion functions. */
struct _IO_codecvt *cc = fp->_codecvt;

__libc_lock_lock (__gconv_lock);
__gconv_release_step (cc->__cd_in.step);
__gconv_release_step (cc->__cd_out.step);
__libc_lock_unlock (__gconv_lock);
}
else
{
if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
}
_IO_deallocate_file (fp);
return status;
}

0x1

首先调用_IO_un_link将_IO_FILE从_IO_list_all解除,文件位于libio/gneops.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
void
_IO_un_link (struct _IO_FILE_plus *fp)
{
if (fp->file._flags & _IO_LINKED)
{
FILE **f;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (FILE *) fp;
_IO_flockfile ((FILE *) fp);
#endif
if (_IO_list_all == NULL)
;
else if (fp == _IO_list_all)
_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
else
for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
if (*f == (FILE *) fp)
{
*f = fp->file._chain;
break;
}
fp->file._flags &= ~_IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}

遍历寻找当前_IO_FILE将其解链

并进行其他一些参数设置

0x2

再往后便是_IO_FINISH,其也是一个调用vtable中函数的宏

其调用的是_IO_new_file_finish,位于libio/fileops.c

1
2
3
4
5
6
7
8
9
10
11
void
_IO_new_file_finish (FILE *fp, int dummy)
{
if (_IO_file_is_open (fp))
{
_IO_do_flush (fp);
if (!(fp->_flags & _IO_DELETE_DONT_CLOSE))
_IO_SYSCLOSE (fp);
}
_IO_default_finish (fp, 0);
}

_IO_do_flush会将_IO_FILE中的缓存输出

之后再调用系统调用close关闭文件流

再运行_IO_default_finish

位于libcio/genops.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void
_IO_default_finish (FILE *fp, int dummy)
{
struct _IO_marker *mark;
if (fp->_IO_buf_base && !(fp->_flags & _IO_USER_BUF))
{
free (fp->_IO_buf_base);
fp->_IO_buf_base = fp->_IO_buf_end = NULL;
}

for (mark = fp->_markers; mark != NULL; mark = mark->_next)
mark->_sbuf = NULL;

if (fp->_IO_save_base)
{
free (fp->_IO_save_base);
fp->_IO_save_base = NULL;
}

_IO_un_link ((struct _IO_FILE_plus *) fp);

#ifdef _IO_MTSAFE_IO
if (fp->_lock != NULL)
_IO_lock_fini (*fp->_lock);
#endif
}

删除缓冲区并再次_IO_unlink

0x3

回到_IO_new_fclose

在设置一些参数之后最后调用_IO_deallocate_file

其位于libio/libioP.h

1
2
3
4
5
6
7
8
9
10
11
12
13
static inline void
_IO_deallocate_file (FILE *fp)
{
/* The current stream variables. */
if (fp == (FILE *) &_IO_2_1_stdin_ || fp == (FILE *) &_IO_2_1_stdout_
|| fp == (FILE *) &_IO_2_1_stderr_)
return;
#if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
if (_IO_legacy_file (fp))
return;
#endif
free (fp);
}

检查要关闭的_IOFILE是否为_IO_2_1_stdin\ ||_IO2_1_stdout\||_IO2_1_stderr

是则直接返回,因为这三个流并不在堆上

否则的话该文件流就应该位于堆上,对其进行free

大致就是这么个流程

fflush

fflush

刷新文件流函数,将输write缓冲区中的未写入数据刷新到文件中

给定需要刷新的 FILE 指针,关闭成功返回 0,失败返回 EOF(-1)。

1
int fflush ( FILE * stream );

如果当前的 stream 是为写入打开的,或者为了更新打开的且最后一个 io 操作是 output,那么任何在 outbuffer 中未写入的数据都将会被写入到文件中;如果 stream 是空指针,那么所有的 stream 将会被 flush。

fflush由_IO_fflush实现,位于libio/iofflush.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
int
_IO_fflush (FILE *fp)
{
if (fp == NULL)
return _IO_flush_all ();
else
{
int result;
CHECK_FILE (fp, EOF);
_IO_acquire_lock (fp);
result = _IO_SYNC (fp) ? EOF : 0;
_IO_release_lock (fp);
return result;
}
}
libc_hidden_def (_IO_fflush)

0x1all分支

_IO_flush_all位于libio/genops.c

1
2
3
4
5
6
7
int
_IO_flush_all (void)
{
/* We want locking. */
return _IO_flush_all_lockp (1);
}
libc_hidden_def (_IO_flush_all)

纯调用_IO_flush_all_lockp(1),不过高版本开始\取消_IO_flush_all_lockp,其所有功能由_IO_flush_all直接完成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
int
_IO_flush_all_lockp (int do_lock)
{
int result = 0;
FILE *fp;

#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);//上锁
#endif

for (fp = (FILE *) _IO_list_all; fp != NULL; fp = fp->_chain)
{
run_fp = fp;
if (do_lock)
_IO_flockfile (fp);//上锁

if (((fp->_mode <= 0 && fp->_IO_write_ptr > fp->_IO_write_base)
|| (_IO_vtable_offset (fp) == 0
&& fp->_mode > 0 && (fp->_wide_data->_IO_write_ptr
> fp->_wide_data->_IO_write_base))
)
&& _IO_OVERFLOW (fp, EOF) == EOF)
result = EOF;

if (do_lock)
_IO_funlockfile (fp);
run_fp = NULL;
}

#ifdef _IO_MTSAFE_IO
_IO_lock_unlock (list_all_lock);//解锁
_IO_cleanup_region_end (0);
#endif

return result;
}

检查当前 FILE 对象的情况,如果是以下两种情况:

  • (fp->_mode <= 0 && fp->_IOwrite_ptr > fp->_IO_write_base)非宽字符
  • _(_IO_vtable_offset (fp) == 0&& fp->_mode > 0 && (fp->_wide_data->_IO_write_ptr> fp->_wide_data->_IO_write_base))宽字符

则会调用_IO_OVERFLOW (fp, EOF)

_IO_OVERFLOW (fp, EOF)就是调用对应 fp 的函数指针实现对应的写入功能,最后实际调用到了_IO_do_write,将 f->_IO_write_base 开始,长度为(f->_IO_write_ptr - f->_IO_write_base)的数据写入文件,之前有提到过,这里不展开

0x2指定fp分支

1
2
3
4
5
6
7
8
9
else
{
int result;
CHECK_FILE (fp, EOF);
_IO_acquire_lock (fp);//上锁
result = _IO_SYNC (fp) ? EOF : 0;
_IO_release_lock (fp);
return result;
}

1.CHECK_FILE

检查 FILE 对象是否合法,包括是否空指针,_flags 是否在合法范围内。

1
2
3
4
5
6
7
8
9
10
11
12
865 #ifdef IO_DEBUG
866 # define CHECK_FILE(FILE, RET) do { \
867 if ((FILE) == NULL \
868 || ((FILE)->_flags & _IO_MAGIC_MASK) != _IO_MAGIC) \
869 { \
870 __set_errno (EINVAL); \
871 return RET; \
872 } \
873 } while (0)
874 #else
875 # define CHECK_FILE(FILE, RET) do { } while (0)
876 #endif

2.sync

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
int
_IO_new_file_sync (FILE *fp)
{
ssize_t delta;
int retval = 0;

/* char* ptr = cur_ptr(); */
if (fp->_IO_write_ptr > fp->_IO_write_base)
if (_IO_do_flush(fp)) return EOF;
delta = fp->_IO_read_ptr - fp->_IO_read_end;
if (delta != 0)
{
off64_t new_pos = _IO_SYSSEEK (fp, delta, 1);
if (new_pos != (off64_t) EOF)
fp->_IO_read_end = fp->_IO_read_ptr;
else if (errno == ESPIPE)
; /* Ignore error from unseekable devices. */
else
retval = EOF;
}
if (retval != EOF)
fp->_offset = _IO_pos_BAD;
/* FIXME: Cleanup - can this be shared? */
/* setg(base(), ptr, ptr); */
return retval;
}
libc_hidden_ver (_IO_new_file_sync, _IO_file_sync)

write缓冲区中如果有数据未写入

调用_IO_do_flush(fp)

1
2
3
4
5
6
7
#define _IO_do_flush(_f) \
((_f)->_mode <= 0 \
? _IO_do_write(_f, (_f)->_IO_write_base, \
(_f)->_IO_write_ptr-(_f)->_IO_write_base) \
: _IO_wdo_write(_f, (_f)->_wide_data->_IO_write_base, \
((_f)->_wide_data->_IO_write_ptr \
- (_f)->_wide_data->_IO_write_base)))

setbuf

setbuf指定对应文件流 stream 的 IO 操作 buffer,此时该 stream 就一定是使用缓存 buffer 的,或者如果 buffer 指针为 NULL,那么此时的 stream 会被禁用缓存 buffer。

  • 使用缓存 buffer:读写文件时的信息并不是与文件完全相同的,只有当调用了 fflush 函数才会将缓存 buffer 中的信息同步到文件中;
  • 不使用缓存 buffer:那么写入的信息将会尽可能快地同步到文件中。

注意:buffer 的 size 大小有要求为 BUFSIZ

1
void setbuf ( FILE * stream, char * buffer );

假设两个 FILE 对象,其中一个设置为 buffer,另一个设置为 no buffer,那么 pFile1 只有再调用 fflush(pFile1)之后信息才完全写入文件,而 pFile2 的信息是尽可能快地写入文件,不必使用 fflush,当然,最后 fclose 之后,buffer 中的信息都会同步到文件中.

setbuf位于libio/setbuf.c

1
2
3
4
5
void
setbuf (FILE *fp, char *buf)
{
_IO_setbuffer (fp, buf, BUFSIZ);
}

仅是调用 _IO_setbuffer,位于libio/iosetbuffer.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
void
_IO_setbuffer (FILE *fp, char *buf, size_t size)//size是不可控的
{
CHECK_FILE (fp, );//检查fp是否有效
_IO_acquire_lock (fp);//上锁
fp->_flags &= ~_IO_LINE_BUF;//将IO_LINE_BUF行缓冲标志设置为0
if (!buf)//参数为NULL
size = 0;
(void) _IO_SETBUF (fp, buf, size);//首先调用_IO_new_file_setbuf
if (_IO_vtable_offset (fp) == 0 && fp->_mode == 0 && _IO_CHECK_WIDE (fp))
/* We also have to set the buffer using the wide char function. */
(void) _IO_WSETBUF (fp, buf, size);//宽字符额外设置,暂且不表
_IO_release_lock (fp);//释放锁
}
libc_hidden_def (_IO_setbuffer)
1
2
3
4
5
6
7
8
# define CHECK_FILE(FILE, RET) do {				\
if ((FILE) == NULL \
|| ((FILE)->_flags & _IO_MAGIC_MASK) != _IO_MAGIC) \
{ \
__set_errno (EINVAL); \
return RET; \
} \
} while (0)

0x1

先看_IO_new_file_setbuf

1
2
3
4
5
6
7
8
9
10
11
12
13
FILE *
_IO_new_file_setbuf (FILE *fp, char *p, ssize_t len)
{
if (_IO_default_setbuf (fp, p, len) == NULL)
return NULL;

fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;//修改w缓存指针
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);//设置r缓存指针

return fp;
}
libc_hidden_ver (_IO_new_file_setbuf, _IO_file_setbuf)

又先调用_IO_default_setbuf

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
FILE *
_IO_default_setbuf (FILE *fp, char *p, ssize_t len)
{
if (_IO_SYNC (fp) == EOF)
return NULL;
if (p == NULL || len == 0)
{
fp->_flags |= _IO_UNBUFFERED;//设置nobuf
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}
else
{
fp->_flags &= ~_IO_UNBUFFERED;//取消nobuf标志
_IO_setb (fp, p, p+len, 0);
}
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = 0;
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_read_end = 0;//清空rw缓存指针
return fp;
}

_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);

_IO_setb (fp, p, p+len, 0);

1
2
3
4
5
6
7
8
9
10
11
12
13
void
_IO_setb (FILE *f, char *b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))//如果文件流存在buffer且不是保留buf模式
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;//buf缓冲区设置
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;//使文件流关闭时不处理buf
}
libc_hidden_def (_IO_setb)

总结就是修改缓冲区及相应标志位再重置rw指针

setvbuf

setvbuf

改变文件流 buffer 函数

指定对应文件流 stream 的 IO 操作 buffer,同时设定该块缓存 buffer 的操作 mode 和 size 大小,如果 buffer 指针是空指针,那么 setvbuf 函数将会自动分配一块默认大小大小的 buffer 作为缓存使用。

只有buf不为null时,size才有效

1
int setvbuf ( FILE * stream, char * buffer, int mode, size_t size );

注意:上面的 mode 有以下的选择

  • _IOFBF:Full Buffering:输出操作中,数据在 buffer 写满后写入物理文件;输入操作中,buffer 只有在全为空时才被填写,填充的可能是多行数据;
  • _IOLBF:Line Buffering:输出操作中,数据在新的一行插入 FILE 流对象或 buffer 写满时触发写入物理文件;输入操作中,buffer 只有在 buffer 全为空时,写入新的一行到 buffer 中。
  • _IONBF:No Buffering:不使用缓存 buffer,所有输入输出操作都尽可能快地写入物理文件,当前模式下,buffer 和 size 参数将会被忽略
1
2
3
4
/* The possibilities for the third argument to `setvbuf'.  */
#define _IOFBF 0 /* Fully buffered. */
#define _IOLBF 1 /* Line buffered. */
#define _IONBF 2 /* No buffering. */

注意:setvbuf 的调用时机,在一个文件流对象绑定到一个打开的文件之后,对该文件流对象进行文件读写操作之前。

可以看如下的例子:

打开了一个 pFIle 对象,并将其 buffer 设置为 NULL(函数内部将自动生成一块大小为 1024Byte 大小的 buffer),mode 设置为_IOFBF。那么,在进行文件操作过程中,如向文件写入过程中,每写满 1024 字节才会触发一次将数据写入物理文件。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/* setvbuf example */
#include <stdio.h>
int main ()
{
FILE *pFile;

pFile=fopen ("myfile.txt","w");

setvbuf ( pFile , NULL , _IOFBF , 1024 );

// File operations here

fclose (pFile);

return 0;
}

setvbuf调用的是_IO_setvbuf
函数位于libio/iosetvbuf.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
int
_IO_setvbuf (FILE *fp, char *buf, int mode, size_t size)
{
int result;
CHECK_FILE (fp, EOF);//同样的校验
_IO_acquire_lock (fp);//同样的上锁
switch (mode)
{
case _IOFBF://全缓冲
fp->_flags &= ~(_IO_LINE_BUF|_IO_UNBUFFERED);//置空行缓冲和无缓冲标志
if (buf == NULL)//未指定buffer地址
{
if (fp->_IO_buf_base == NULL)
{
/* There is no flag to distinguish between "fully buffered
mode has been explicitly set" as opposed to "line
buffering has not been explicitly set". In both
cases, _IO_LINE_BUF is off. If this is a tty, and
_IO_filedoalloc later gets called, it cannot know if
it should set the _IO_LINE_BUF flag (because that is
the default), or not (because we have explicitly asked
for fully buffered mode). So we make sure a buffer
gets allocated now, and explicitly turn off line
buffering.

A possibly cleaner alternative would be to add an
extra flag, but then flags are a finite resource. */
if (_IO_DOALLOCATE (fp) < 0)//为其分配一块内存
{
result = EOF;
goto unlock_return;
}
fp->_flags &= ~_IO_LINE_BUF;//因为缓存分配函数默认会将行缓冲标志设为1,再次清空,可以看一下上面一大段英文
}
result = 0;
goto unlock_return;
}
break;
case _IOLBF:
fp->_flags &= ~_IO_UNBUFFERED;//清空无缓冲标志
fp->_flags |= _IO_LINE_BUF;//设置行缓冲模式
if (buf == NULL)
{
result = 0;
goto unlock_return;
}
break;
case _IONBF:
fp->_flags &= ~_IO_LINE_BUF;//设置无缓冲标志
fp->_flags |= _IO_UNBUFFERED;//清空行缓冲模式
buf = NULL;
size = 0;
break;
default:
result = EOF;
goto unlock_return;
}
result = _IO_SETBUF (fp, buf, size) == NULL ? EOF : 0;//设置buf

unlock_return:
_IO_release_lock (fp);
return result;
}

0x1_IOFBF

  • 首先将_IO_LINE_BUF 和_IO_UNBUFFERED 位置为 0,因为目前是要求 full buffering 的;
  • 然后我们检查输入参数 buf,如果为空的话,我们要尝试进行分配 buffer 分配;
  • 再次我们检查 fp->_IO_buf_base 参数,这里指向的是 fp 预先分配的缓存 buffer,只有这里也为空,那就说明完全没有缓存 buffer 可用,那我们就真的需要进行分配了;
  • 调用_IO_DOALLOCATE 对 fp 进行 buffer 分配
  • 根据分配 buffer 是否失败决定是直接返回错误 EOF,还是重新只将_IO_LINE_BUF 置为 0
  • 注意了,上面都是 buf 为空,需要重新
  • 分配的情况,如果 buf 不为空,那么我们会跳到
1
result = _IO_SETBUF (fp, buf, size) == NULL ? EOF : 0;

的执行中,进行 buf 设置;如果 fp->_IO_buf_base 不等于 NULL,那我们实际上是默认使用这块 buffer 的,返回 0,退出函数

看一下其中的doalloc函数,位于libio/filedoallocate.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
int
_IO_file_doallocate (FILE *fp)
{
size_t size;
char *p;
struct stat64 st;

size = BUFSIZ;//默认size
if (fp->_fileno >= 0 && __builtin_expect (_IO_SYSSTAT (fp, &st), 0) >= 0)
{
if (S_ISCHR (st.st_mode))
{
/* Possibly a tty. */
if (
#ifdef DEV_TTY_P
DEV_TTY_P (&st) ||
#endif
local_isatty (fp->_fileno))
fp->_flags |= _IO_LINE_BUF;//设置行缓冲
}
#if defined _STATBUF_ST_BLKSIZE
if (st.st_blksize > 0 && st.st_blksize < BUFSIZ)
size = st.st_blksize;
#endif
}
p = malloc (size);//申请
if (__glibc_unlikely (p == NULL))
return EOF;
_IO_setb (fp, p, p + size, 1);//设置缓冲区
return 1;
}
libc_hidden_def (_IO_file_doallocate)

这个函数的核心作用就是为 fp->_IO_buf_base 分配一块合理大小的 buffer 用作缓存,我们来看看它的一些具体逻辑:

  • 默认 size 大小是 size = BUFSIZ (8192 字节)
  • 对 fp 指针状态进行设置,将_IO_LINE_BUF 置位;
  • 通过获取该 IO 流的 stat 信息 st,决定是否有必要采用其中 st_blksize 更新 size(主要是考虑使用一个比 8192 更小的 size,分配足够的就行,不一定要最大的 size)
  • 通过 malloc 分配对应大小的 buffer,然后调用_IO_setb 将 fp->_IO_buf_base 设置为刚才申请的地址

0x2_IOLBF

这种情况是按行使用 buffer,主要做了以下操作:

  • 设置 tag,将_IO_UNBUFFERED 置 0,将_IO_LINE_BUF 置位;
  • 如果入参 buf 为空,那就直接返回 0,结束函数;否则等待执行_IO_SETBUF (fp, buf, size)

思考:这里为什么不重新检查 fp->_IO_buf_base 然后分配内存呢?

从上一种情况中我们注意到,在分配 buffer 后我们都默认将_IO_LINE_BUF 置位,即这是一种默认模式,所以我们无需检查 fp->_IO_buf_base 的状态

0x3_IONBUF

这种情况的操作就更为简单了,禁用了 buffer,我们将_IO_LINE_BUF 置 0,_IO_UNBUFFERED 置位,然后将入参 buf 置为 NULL,size 置为 0,等待调用_IO_SETBUF (fp, buf, size)